#QC filter, delete nextera adapter and low quality
bash src/03.trimgalore.sh
#Remove adapters
bash src/05.cutadaptASVs were inferred from each sequencing run separately with DADA2 in R
The code for each run is found in:
2022 src/Dada2_2022.html 2023
src/Dada2_2023.html
Import outputs from DADA2 to QIIME2
#Create output results dir
mkdir -p results/02.QIIME2
#Import run 2022 files
#import fasta seqs
qiime tools import --input-path results/01.Dada2/2022/ASVs_2022.fasta --type 'FeatureData[Sequence]' --output-path results/02.QIIME2/ASV_rep_seq_2022.qza
# append missing header to the table for import
cat <(echo -n "#OTU Table") results/01.Dada2/2022/ASV_to_seqs-nochim_2022.tsv > results/02.QIIME2/temp.txt
# convert to biom
biom convert -i results/02.QIIME2/temp.txt -o results/02.QIIME2/temp.biom --table-type="OTU table" --to-hdf5
#create feature table
qiime tools import --input-path results/02.QIIME2/temp.biom --type 'FeatureTable[Frequency]' --input-format BIOMV210Format --output-path results/02.QIIME2/ASV_table_2022.qza
#Get visual summarizes
qiime feature-table tabulate-seqs \
--i-data results/02.QIIME2/ASV_rep_seq_2022.qza \
--o-visualization results/02.QIIME2/ASV_rep_seq_2022.qzv
qiime feature-table summarize \
--i-table results/02.QIIME2/ASV_table_2022.qza \
--o-visualization results/02.QIIME2/ASV_table_2022.qzv
#Import run 2023 files
#import fasta seqs
qiime tools import --input-path results/01.Dada2/2023/ASVs_2023.fasta --type 'FeatureData[Sequence]' --output-path results/02.QIIME2/ASV_rep_seq_2023.qza
# append missing header to the table for import
cat <(echo -n "#OTU Table") results/01.Dada2/2023/ASV_to_seqs-nochim_2023.tsv > results/02.QIIME2/temp2.txt
# convert to biom
biom convert -i results/02.QIIME2/temp2.txt -o results/02.QIIME2/temp2.biom --table-type="OTU table" --to-hdf5
#create feature table
qiime tools import --input-path results/02.QIIME2/temp2.biom --type 'FeatureTable[Frequency]' --input-format BIOMV210Format --output-path results/02.QIIME2/ASV_table_2023.qza
#Get visual summarizes
qiime feature-table tabulate-seqs \
--i-data results/02.QIIME2/ASV_rep_seq_2023.qza \
--o-visualization results/02.QIIME2/ASV_rep_seq_2023.qzv
qiime feature-table summarize \
--i-table results/02.QIIME2/ASV_table_2023.qza \
--o-visualization results/02.QIIME2/ASV_table_2023.qzvHere I removed all ASVs with a frequency of less than 0.1% of the mean sample depth. This cut-off excludes ASVs that are likely due to MiSeq bleed-through between runs (reported by Illumina to be 0.1% of reads). To calculate this cut-off I identified the mean sample depth, multiplied it by 0.001, and rounded to the nearest integer. This step are describe in this paper
# 2022
#Filter feature table by frequence
qiime feature-table filter-features --i-table results/02.QIIME2/ASV_table_2022.qza --p-min-samples 1 --p-min-frequency 52 --o-filtered-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza
#Get visual feature table summary
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza --o-visualization results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qzv
#Filter rep-seqs 2022
qiime feature-table filter-seqs --i-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza --i-data results/02.QIIME2/ASV_rep_seq_2022.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_2022_filter-freq52.qza
# 2023
#Filter feature table by frequence
qiime feature-table filter-features --i-table results/02.QIIME2/ASV_table_2023.qza --p-min-samples 1 --p-min-frequency 97 --o-filtered-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza
#Get visual feature table summary
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --o-visualization results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qzv
#Filter rep-seqs 2023
qiime feature-table filter-seqs --i-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --i-data results/02.QIIME2/ASV_rep_seq_2023.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_2023_filter-freq97.qza#Merge rep seqs
qiime feature-table merge-seqs --i-data results/02.QIIME2/ASV_rep_seq_2022_filter-freq52.qza results/02.QIIME2/ASV_rep_seq_2023_filter-freq97.qza --o-merged-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza
#Merge feature tables
qiime feature-table merge --i-tables results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --p-overlap-method sum --o-merged-table results/02.QIIME2/merged-table-filters-freq52-97.qza
#Get visual summarizes
qiime feature-table tabulate-seqs --i-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-visualization results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qzv
qiime feature-table summarize --i-table results/02.QIIME2/merged-table-filters-freq52-97.qza --o-visualization results/02.QIIME2/merged-table-filters-freq52-97.qzv#Tax classification
qiime feature-classifier classify-sklearn --i-classifier data/dbs/classifier_silva_138_trained.qza --i-reads results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-classification results/02.QIIME2/taxonomy_merge.qza --p-n-jobs 80
#get visualization
qiime metadata tabulate --m-input-file results/02.QIIME2/taxonomy_merge.qza --o-visualization results/02.QIIME2/taxonomy_merge.qzvqiime taxa filter-table --i-table results/02.QIIME2/merged-table-filters-freq52-97.qza --i-taxonomy results/02.QIIME2/taxonomy_merge.qza --p-exclude Archaea,Eukaryota,Mitochondria,Chloroplast --p-include p__ --o-filtered-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --o-visualization results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qzv| Type | ASVs | Frequency | Samples |
|---|---|---|---|
| 2022 | 29302 | 2677260 | 53 |
| 2023 | 19143 | 3035299 | 30 |
| 2022-filter-freq | 6316 | 2410664 | 53 |
| 2023-filter-freq | 2935 | 2754795 | 30 |
| Merge-fltr-freq | 6316 | 5165459 | 83 |
| Mrg-fltr-freq-aemc | 6261 | 5084303 | 83 |
Remove in fasta seqs
#remove in fasta sequences
qiime feature-table filter-seqs --i-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --i-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_filters_freq_merge_aemc.qzanohup qiime phylogeny align-to-tree-mafft-iqtree \
--p-n-threads auto --i-sequences results/02.QIIME2/ASV_rep_seq_filters_freq_merge_aemc.qza \
--o-alignment results/02.QIIME2/align-mft-iqt.qza \
--o-masked-alignment results/02.QIIME2/masked-align-iqtree.qza \
--o-tree results/02.QIIME2/unrooted-tree-iqtree.qza \
--o-rooted-tree results/02.QIIME2/rooted-tree-iqtree.qza --verbose > outs/07.phylogeny-iqtree.nohup &
[1] 2166700#taxonomy
qiime tools export --input-path results/02.QIIME2/taxonomy_merge.qza --output-path results/02.QIIME2/exports/taxonomy
#feature table
qiime tools export --input-path results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --output-path results/02.QIIME2/exports/ASV_table
#reformat taxonomy tsv
sed -i -e '1 s/Feature/#Feature/' -e '1 s/Taxon/taxonomy/' results/02.QIIME2/exports/taxonomy/taxonomy.tsv
#Add taxonomy to feature table
biom add-metadata -i results/02.QIIME2/exports/ASV_table/feature-table.biom -o results/02.QIIME2/exports/feature-table_tax.biom --observation-metadata-fp results/02.QIIME2/exports/taxonomy/taxonomy.tsv --sc-separated results/02.QIIME2/exports/taxonomy/taxonomy.tsv
#Convert to tsv from biom format
biom convert -i results/02.QIIME2/exports/feature-table_tax.biom -o results/02.QIIME2/exports/feature-table_tax.tsv --to-tsv --header-key taxonomy
#Get effective reads per sample
biom summarize-table -i results/02.QIIME2/exports/feature-table_tax.biom -o results/02.QIIME2/exports/feature-table_tax_reads_per_sample_summary.txt
#Get ASVs per sample
biom summarize-table -i results/02.QIIME2/exports/feature-table_tax.biom --qualitative -o results/02.QIIME2/exports/feature-table_tax_ASVs_per_sample_summary.txtload("src/Diversity.RData")It is recommended to choose specimens that have at least two compartments. This will enable comparison between them.
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Filter
metadata_fltr <- metadata %>%
group_by(Specimen) %>%
filter(n() >= 2) %>%
ungroup()
#save table
write.table(metadata_fltr, "results/03.Diversity/Metadata_fltr.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)Compare
dim(metadata)## [1] 83 13
dim(metadata_fltr)## [1] 72 13
library(ggplot2)
library(ggsci)
library(cowplot)
#all
# factor to reorder plot
metadata$Location <- factor(metadata$Location)
metadata$Source <- factor(metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
metadata$Specimen <- factor(metadata$Specimen)
# plot
samples_plot_all <- ggplot(metadata, aes(x = Specimen, y = ..count.., fill = Source)) +
geom_bar(position = "dodge") +
facet_wrap(~ Location, scales = "free_x") +
labs(title = "Samples by Location and Compartment", y = "Number of Samples", x = "Specimen") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + # Rotate labels
scale_fill_jco()
# fltr
# factor to reorder plot
metadata_fltr$Location <- factor(metadata_fltr$Location)
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
metadata_fltr$Specimen <- factor(metadata_fltr$Specimen)
#plot
samples_plot_fltr <- ggplot(metadata_fltr, aes(x = Specimen, y = ..count.., fill = Source)) +
geom_bar(position = "dodge") +
facet_wrap(~ Location, scales = "free_x") +
labs(title = "Samples by Location and Compartment", y = "Number of Samples", x = "Specimen") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + # Rotate labels
scale_fill_jco()
samples_distribution_plot <- plot_grid(samples_plot_all, samples_plot_fltr, labels = c("A", "B"), ncol = 1, rel_heights = c(1, 1))## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#save plot
ggsave("results/plots/02.diversity/01.samples_distribution.png", samples_distribution_plot , width = 8, height = 7)
# show
samples_distribution_plot# Filter otu table
# Create vector with identifiers of samples to keep
samples_to_keep <- metadata_fltr$`sample-id`
# Filter OTU table
otu_table_fltr <- otu_table[, samples_to_keep, drop = FALSE]
# check
dim(otu_table)## [1] 6261 83
dim(otu_table_fltr)## [1] 6261 72
library(phyloseq)
library(qiime2R)
#create phyloseq object
ps <- qza_to_phyloseq(
features = "results/02.QIIME2/merged-table-filters-freq52-97.qza",
tree = "results/02.QIIME2/rooted-tree-iqtree.qza",
taxonomy = "results/02.QIIME2/taxonomy_merge.qza",
metadata = "data/metadata.tsv")library(phyloseq)
library(dplyr)
# Filter samples according previous filters
# Extract metadata
meta_data <- as.data.frame(sample_data(ps))
# Filter
filtered_meta_data <- meta_data %>%
group_by(Specimen) %>%
mutate(n_compartments = n_distinct(Source)) %>%
filter(n_compartments >= 2) %>%
ungroup()
# Empty check point
if(nrow(filtered_meta_data) == 0) {
stop("No samples meet the criteria")
}
# get samples
sample_ids_to_keep <- filtered_meta_data$sample
# Sample of interest check point
if(length(sample_ids_to_keep) == 0) {
stop("No valid sample IDs were found. Check your metadata processing steps.")
}
# Prune samples from the phyloseq object
ps2 <- prune_samples(sample_ids_to_keep, ps)
# show
print(ps)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6261 taxa and 83 samples ]
## sample_data() Sample Data: [ 83 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6261 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6261 tips and 6260 internal nodes ]
print(ps2)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6261 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6261 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6261 tips and 6260 internal nodes ]
library(devtools)## Loading required package: usethis
library(microbiome)##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
library(microbiomeutilities)## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
##
## Attaching package: 'microbiomeutilities'
## The following object is masked from 'package:microbiome':
##
## add_refseq
#check data
microbiome::summarize_phyloseq(ps2)## Compositional = NO2
## 1] Min. number of reads = 238902] Max. number of reads = 1738693] Total number of reads = 47393804] Average number of reads = 65824.72222222225] Median number of reads = 59703.57] Sparsity = 0.7991002502262686] Any OTU sum to 1 or less? YES8] Number of singletons = 1009] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 23890"
##
## [[2]]
## [1] "2] Max. number of reads = 173869"
##
## [[3]]
## [1] "3] Total number of reads = 4739380"
##
## [[4]]
## [1] "4] Average number of reads = 65824.7222222222"
##
## [[5]]
## [1] "5] Median number of reads = 59703.5"
##
## [[6]]
## [1] "7] Sparsity = 0.799100250226268"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 100"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
Remove singletons
#Delete singletons
ps3 <- filter_taxa(ps2, function(x) sum(x) > 1, TRUE)Remove Phylum with low prevalence
# Explore prevalence
## Get prevalence
prevdf = apply(X = otu_table(ps3),
MARGIN = ifelse(taxa_are_rows(ps3), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
## Add taxonomy
prevdf = data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(ps3),
tax_table(ps3))
## Check prevalence at Phylum level
plyr::ddply(prevdf, "Phylum", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})plyr::ddply(prevdf, "Genus", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})# tax to filter
filtertaxa = c("Elusimicrobiota", "GAL15")
# filter
ps4 = subset_taxa(ps3, !Phylum %in% filtertaxa)Clean names
#check names
#head(microbiome::abundances(ps_fltr))
head(phyloseq::tax_table(ps4))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## ASV_1034 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_1428 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4955 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_2133 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4953 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_6141 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## Family Genus Species
## ASV_1034 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_1428 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_4955 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_2133 "WD2101_soil_group" "WD2101_soil_group" "uncultured_bacterium"
## ASV_4953 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_6141 "WD2101_soil_group" "WD2101_soil_group" NA
#Next, cleaning the "d__" and similar values.
# extending gsub to entire table
tax_table(ps4)[, colnames(tax_table(ps4))] <- gsub(tax_table(ps4)[, colnames(tax_table(ps4))], pattern = "[a-z]__", replacement = "")
#change ambiguities
tax_table(ps4)[tax_table(ps4) == "uncultured_bacterium"] <- NA
tax_table(ps4)[tax_table(ps4) == "uncultured"] <- NA
tax_table(ps4)[tax_table(ps4) == "metagenome"] <- NA #check
head(phyloseq::tax_table(ps4))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## ASV_1034 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_1428 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4955 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_2133 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4953 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_6141 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## Family Genus Species
## ASV_1034 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_1428 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_4955 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_2133 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_4953 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_6141 "WD2101_soil_group" "WD2101_soil_group" NA
Agglomerate levels
#Aggregate at genus level.
ps_family <- phyloseq::tax_glom(ps4, "Family", NArm = TRUE)
ps_genus <- phyloseq::tax_glom(ps4, "Genus", NArm = TRUE)
ps_species <- phyloseq::tax_glom(ps4, "Species", NArm = TRUE)ps_genus <- filter_taxa(ps_genus, function(x) sum(x) > 1, TRUE)
head(phyloseq::tax_table(ps_genus))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## ASV_519 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_5638 "Bacteria" "Planctomycetota" "Phycisphaerae" "Phycisphaerales"
## ASV_4626 "Bacteria" "Planctomycetota" "Phycisphaerae" "mle1-8"
## ASV_4159 "Bacteria" "Planctomycetota" "OM190" "OM190"
## ASV_498 "Bacteria" "Planctomycetota" "Planctomycetes" "Pirellulales"
## ASV_765 "Bacteria" "Planctomycetota" "Planctomycetes" "Pirellulales"
## Family Genus Species
## ASV_519 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_5638 "Phycisphaeraceae" "AKYG587" NA
## ASV_4626 "mle1-8" "mle1-8" NA
## ASV_4159 "OM190" "OM190" NA
## ASV_498 "Pirellulaceae" "Pir4_lineage" NA
## ASV_765 "Pirellulaceae" "Pirellula" NA
Change id by tax name
#Substitute these IDs with names of genus
#ps_genus <- format_to_besthit(ps_genus) #no me gusta el formato, pero sirve
taxa_names(ps_genus) <- tax_table(ps_genus)[,"Genus"]
head(taxa_names(ps_genus))## [1] "WD2101_soil_group" "AKYG587" "mle1-8"
## [4] "OM190" "Pir4_lineage" "Pirellula"
head(phyloseq::tax_table(ps_genus))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class
## WD2101_soil_group "Bacteria" "Planctomycetota" "Phycisphaerae"
## AKYG587 "Bacteria" "Planctomycetota" "Phycisphaerae"
## mle1-8 "Bacteria" "Planctomycetota" "Phycisphaerae"
## OM190 "Bacteria" "Planctomycetota" "OM190"
## Pir4_lineage "Bacteria" "Planctomycetota" "Planctomycetes"
## Pirellula "Bacteria" "Planctomycetota" "Planctomycetes"
## Order Family Genus
## WD2101_soil_group "Tepidisphaerales" "WD2101_soil_group" "WD2101_soil_group"
## AKYG587 "Phycisphaerales" "Phycisphaeraceae" "AKYG587"
## mle1-8 "mle1-8" "mle1-8" "mle1-8"
## OM190 "OM190" "OM190" "OM190"
## Pir4_lineage "Pirellulales" "Pirellulaceae" "Pir4_lineage"
## Pirellula "Pirellulales" "Pirellulaceae" "Pirellula"
## Species
## WD2101_soil_group NA
## AKYG587 NA
## mle1-8 NA
## OM190 NA
## Pir4_lineage NA
## Pirellula NA
Clean rare characters
# Remove characters
taxa_names(ps_genus) <- gsub(" ", ".", taxa_names(ps_genus))
taxa_names(ps_genus) <- gsub("\\(|\\)", "", taxa_names(ps_genus))
#check
unique(tax_table(ps_genus)[,"Genus"] )## Taxonomy Table: [418 taxa by 1 taxonomic ranks]:
## Genus
## WD2101_soil_group "WD2101_soil_group"
## AKYG587 "AKYG587"
## mle1-8 "mle1-8"
## OM190 "OM190"
## Pir4_lineage "Pir4_lineage"
## Pirellula "Pirellula"
## SH-PL14 "SH-PL14"
## Planctomicrobium "Planctomicrobium"
## Schlesneria "Schlesneria"
## Gemmata "Gemmata"
## Fimbriiglobus "Fimbriiglobus"
## Zavarzinella "Zavarzinella"
## Gemmataceae "Gemmataceae"
## Aquisphaera "Aquisphaera"
## Singulisphaera "Singulisphaera"
## Tundrisphaera "Tundrisphaera"
## Latescibacterota "Latescibacterota"
## Latescibacteraceae "Latescibacteraceae"
## Terrimicrobium "Terrimicrobium"
## Candidatus_Udaeobacter "Candidatus_Udaeobacter"
## Candidatus_Xiphinematobacter "Candidatus_Xiphinematobacter"
## Chthoniobacter "Chthoniobacter"
## Pedosphaeraceae "Pedosphaeraceae"
## ADurb.Bin063-1 "ADurb.Bin063-1"
## DEV008 "DEV008"
## Ellin517 "Ellin517"
## Pedosphaera "Pedosphaera"
## Ellin516 "Ellin516"
## Candidatus_Kaiserbacteria "Candidatus_Kaiserbacteria"
## Opitutus "Opitutus"
## Lacunisphaera "Lacunisphaera"
## Cerasicoccus "Cerasicoccus"
## Subgroup_17 "Subgroup_17"
## Subgroup_25 "Subgroup_25"
## Subgroup_9 "Subgroup_9"
## Vicinamibacteraceae "Vicinamibacteraceae"
## Luteitalea "Luteitalea"
## Vicinamibacter "Vicinamibacter"
## Edaphobacter "Edaphobacter"
## Terriglobus "Terriglobus"
## Granulicella "Granulicella"
## Acidipila "Acidipila"
## Acidicapsa "Acidicapsa"
## Occallatibacter "Occallatibacter"
## Terracidiphilus "Terracidiphilus"
## Candidatus_Koribacter "Candidatus_Koribacter"
## Subgroup_13 "Subgroup_13"
## Subgroup_5 "Subgroup_5"
## Subgroup_11 "Subgroup_11"
## Neochlamydia "Neochlamydia"
## Candidatus_Protochlamydia "Candidatus_Protochlamydia"
## cvE6 "cvE6"
## Pla4_lineage "Pla4_lineage"
## Ilumatobacter "Ilumatobacter"
## CL500-29_marine_group "CL500-29_marine_group"
## Iamia "Iamia"
## IMCC26256 "IMCC26256"
## 0319-7L14 "0319-7L14"
## Actinomadura "Actinomadura"
## Actinoallomurus "Actinoallomurus"
## Acidothermus "Acidothermus"
## Actinocorallia "Actinocorallia"
## Acrocarpospora "Acrocarpospora"
## Planomonospora "Planomonospora"
## Sphaerisporangium "Sphaerisporangium"
## Streptosporangiaceae "Streptosporangiaceae"
## Nonomuraea "Nonomuraea"
## Streptosporangium "Streptosporangium"
## Streptomyces "Streptomyces"
## Streptacidiphilus "Streptacidiphilus"
## Kitasatospora "Kitasatospora"
## Jiangella "Jiangella"
## Aeromicrobium "Aeromicrobium"
## Marmoricola "Marmoricola"
## Nocardioides "Nocardioides"
## Microlunatus "Microlunatus"
## Cutibacterium "Cutibacterium"
## Actinopolymorpha "Actinopolymorpha"
## Flindersiella "Flindersiella"
## Kribbella "Kribbella"
## Tenggerimyces "Tenggerimyces"
## Actinospica "Actinospica"
## Catenulispora "Catenulispora"
## Lechevalieria "Lechevalieria"
## Saccharothrix "Saccharothrix"
## Amycolatopsis "Amycolatopsis"
## Actinophytocola "Actinophytocola"
## Kutzneria "Kutzneria"
## Actinomycetospora "Actinomycetospora"
## Pseudonocardia "Pseudonocardia"
## Kibdelosporangium "Kibdelosporangium"
## Hamadaea "Hamadaea"
## Rugosimonospora "Rugosimonospora"
## Catellatospora "Catellatospora"
## Actinoplanes "Actinoplanes"
## Rhizocola "Rhizocola"
## Allocatelliglobosispora "Allocatelliglobosispora"
## Longispora "Longispora"
## Virgisporangium "Virgisporangium"
## Dactylosporangium "Dactylosporangium"
## Krasilnikovia "Krasilnikovia"
## Luedemannella "Luedemannella"
## Micromonospora "Micromonospora"
## Asanoa "Asanoa"
## Mycobacterium "Mycobacterium"
## Corynebacterium "Corynebacterium"
## Nocardia "Nocardia"
## Rhodococcus "Rhodococcus"
## Jatrophihabitans "Jatrophihabitans"
## Cryptosporangium "Cryptosporangium"
## Frankia "Frankia"
## Crossiella "Crossiella"
## Saccharopolyspora "Saccharopolyspora"
## Frankiales "Frankiales"
## Geodermatophilus "Geodermatophilus"
## Blastococcus "Blastococcus"
## Modestobacter "Modestobacter"
## Nakamurella "Nakamurella"
## Intrasporangium "Intrasporangium"
## Pseudarthrobacter "Pseudarthrobacter"
## Paenarthrobacter "Paenarthrobacter"
## Agromyces "Agromyces"
## Leifsonia "Leifsonia"
## Microbacterium "Microbacterium"
## Curtobacterium "Curtobacterium"
## Cellulomonas "Cellulomonas"
## Isoptericola "Isoptericola"
## Sporichthya "Sporichthya"
## 67-14 "67-14"
## Conexibacter "Conexibacter"
## Solirubrobacter "Solirubrobacter"
## Parviterribacter "Parviterribacter"
## Solirubrobacteraceae "Solirubrobacteraceae"
## Gaiella "Gaiella"
## MB-A2-108 "MB-A2-108"
## Rubrobacter "Rubrobacter"
## Gitt-GS-136 "Gitt-GS-136"
## KD4-96 "KD4-96"
## Thermosporothrix "Thermosporothrix"
## 1959-1 "1959-1"
## Ktedonobacter "Ktedonobacter"
## Ktedonobacterales "Ktedonobacterales"
## JG30-KF-AS9 "JG30-KF-AS9"
## JG30-KF-CM45 "JG30-KF-CM45"
## AKYG1722 "AKYG1722"
## FFCH7168 "FFCH7168"
## AKIW781 "AKIW781"
## C0119 "C0119"
## S085 "S085"
## OLB14 "OLB14"
## JG30-KF-CM66 "JG30-KF-CM66"
## SAR202_clade "SAR202_clade"
## TK10 "TK10"
## Litorilinea "Litorilinea"
## A4b "A4b"
## OLB13 "OLB13"
## UTCFX1 "UTCFX1"
## RBG-13-54-9 "RBG-13-54-9"
## SBR1031 "SBR1031"
## Domibacillus "Domibacillus"
## Brevibacillus "Brevibacillus"
## Geobacillus "Geobacillus"
## Lysinibacillus "Lysinibacillus"
## Kurthia "Kurthia"
## Paenisporosarcina "Paenisporosarcina"
## Staphylococcus "Staphylococcus"
## Tepidibacillus "Tepidibacillus"
## Fictibacillus "Fictibacillus"
## Bacillus "Bacillus"
## Terribacillus "Terribacillus"
## Lactococcus "Lactococcus"
## Ammoniphilus "Ammoniphilus"
## Paenibacillus "Paenibacillus"
## Cohnella "Cohnella"
## Baia "Baia"
## Tumebacillus "Tumebacillus"
## AD3 "AD3"
## Sporomusa "Sporomusa"
## Anaerospora "Anaerospora"
## Obscuribacteraceae "Obscuribacteraceae"
## Oxynema_BDU_92071 "Oxynema_BDU_92071"
## WPS-2 "WPS-2"
## Bryobacter "Bryobacter"
## Candidatus_Solibacter "Candidatus_Solibacter"
## Paraclostridium "Paraclostridium"
## Romboutsia "Romboutsia"
## Clostridium_sensu_stricto_8 "Clostridium_sensu_stricto_8"
## Clostridium_sensu_stricto_12 "Clostridium_sensu_stricto_12"
## Clostridium_sensu_stricto_9 "Clostridium_sensu_stricto_9"
## Clostridium_sensu_stricto_3 "Clostridium_sensu_stricto_3"
## Clostridium_sensu_stricto_13 "Clostridium_sensu_stricto_13"
## Herbinix "Herbinix"
## Anaerocolumna "Anaerocolumna"
## Saccharimonadales "Saccharimonadales"
## TM7a "TM7a"
## LWQ8 "LWQ8"
## TM7 "TM7"
## Fimbriimonadaceae "Fimbriimonadaceae"
## Armatimonadales "Armatimonadales"
## Kapabacteriales "Kapabacteriales"
## Ruminiclostridium "Ruminiclostridium"
## Anaerobacterium "Anaerobacterium"
## RB41 "RB41"
## JGI_0001001-H03 "JGI_0001001-H03"
## Blastocatella "Blastocatella"
## Aridibacter "Aridibacter"
## Stenotrophobacter "Stenotrophobacter"
## 11-24 "11-24"
## DS-100 "DS-100"
## Subgroup_2 "Subgroup_2"
## Rokubacteriales "Rokubacteriales"
## WX65 "WX65"
## Subgroup_7 "Subgroup_7"
## Vermiphilaceae "Vermiphilaceae"
## Subgroup_10 "Subgroup_10"
## Subgroup_22 "Subgroup_22"
## Subgroup_18 "Subgroup_18"
## Entotheonellaceae "Entotheonellaceae"
## Candidatus_Entotheonella "Candidatus_Entotheonella"
## Gemmatimonas "Gemmatimonas"
## Roseisolibacter "Roseisolibacter"
## S0134_terrestrial_group "S0134_terrestrial_group"
## BD2-11_terrestrial_group "BD2-11_terrestrial_group"
## YC-ZSS-LKJ147 "YC-ZSS-LKJ147"
## Longimicrobiaceae "Longimicrobiaceae"
## Minicystis "Minicystis"
## Polyangium "Polyangium"
## Eel-36e1D6 "Eel-36e1D6"
## BIrii41 "BIrii41"
## Phaselicystis "Phaselicystis"
## mle1-27 "mle1-27"
## Pajaroellobacter "Pajaroellobacter"
## Haliangium "Haliangium"
## Pseudenhygromyxa "Pseudenhygromyxa"
## Nannocystis "Nannocystis"
## Myxococcus "Myxococcus"
## Archangium "Archangium"
## P3OB-42 "P3OB-42"
## Anaeromyxobacter "Anaeromyxobacter"
## Blfdi19 "Blfdi19"
## NB1-j "NB1-j"
## OM27_clade "OM27_clade"
## Geobacter "Geobacter"
## Bdellovibrio "Bdellovibrio"
## MBNT15 "MBNT15"
## bacteriap25 "bacteriap25"
## RCP2-54 "RCP2-54"
## Holophaga "Holophaga"
## Babeliales "Babeliales"
## Nitrospira "Nitrospira"
## 4-29-1 "4-29-1"
## PMMR1 "PMMR1"
## Asticcacaulis "Asticcacaulis"
## Caulobacter "Caulobacter"
## Phenylobacterium "Phenylobacterium"
## A0839 "A0839"
## Nordella "Nordella"
## Rhodomicrobium "Rhodomicrobium"
## Pedomicrobium "Pedomicrobium"
## Hyphomicrobium "Hyphomicrobium"
## Labrys "Labrys"
## Phreatobacter "Phreatobacter"
## Amphiplicatus "Amphiplicatus"
## Bosea "Bosea"
## Neo-b11 "Neo-b11"
## Methylobacterium-Methylorubrum "Methylobacterium-Methylorubrum"
## Microvirga "Microvirga"
## FFCH5858 "FFCH5858"
## Xanthobacteraceae "Xanthobacteraceae"
## Bauldia "Bauldia"
## alphaI_cluster "alphaI_cluster"
## Methylovirgula "Methylovirgula"
## 28-YEA-48 "28-YEA-48"
## D05-2 "D05-2"
## Rubellimicrobium "Rubellimicrobium"
## Amaricoccus "Amaricoccus"
## Kaistia "Kaistia"
## Hirschia "Hirschia"
## SWB02 "SWB02"
## Roseiarcus "Roseiarcus"
## Amb-16S-1323 "Amb-16S-1323"
## Pseudorhodoplanes "Pseudorhodoplanes"
## Pseudolabrys "Pseudolabrys"
## Bradyrhizobium "Bradyrhizobium"
## Rhodoplanes "Rhodoplanes"
## Starkeya "Starkeya"
## Devosia "Devosia"
## Methyloligellaceae "Methyloligellaceae"
## Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"
## Mesorhizobium "Mesorhizobium"
## Ensifer "Ensifer"
## Ochrobactrum "Ochrobactrum"
## Phyllobacterium "Phyllobacterium"
## Aureimonas "Aureimonas"
## KF-JG30-B3 "KF-JG30-B3"
## Telmatospirillum "Telmatospirillum"
## Sphingomonas "Sphingomonas"
## Ellin6055 "Ellin6055"
## Novosphingobium "Novosphingobium"
## Altererythrobacter "Altererythrobacter"
## Qipengyuania "Qipengyuania"
## Sphingobium "Sphingobium"
## Haematospirillum "Haematospirillum"
## Aliidongia "Aliidongia"
## Reyranella "Reyranella"
## Candidatus_Paracaedibacter "Candidatus_Paracaedibacter"
## Dongia "Dongia"
## Acidisoma "Acidisoma"
## Craurococcus-Caldovatus "Craurococcus-Caldovatus"
## Rhodovastum "Rhodovastum"
## Rhodopila "Rhodopila"
## Acidocella "Acidocella"
## Inquilinus "Inquilinus"
## Azospirillum "Azospirillum"
## Skermanella "Skermanella"
## Constrictibacter "Constrictibacter"
## Candidatus_Alysiosphaera "Candidatus_Alysiosphaera"
## URHD0088 "URHD0088"
## Steroidobacter "Steroidobacter"
## R7C24 "R7C24"
## JG36-TzT-191 "JG36-TzT-191"
## Acidibacter "Acidibacter"
## WD260 "WD260"
## KF-JG30-C25 "KF-JG30-C25"
## PLTA13 "PLTA13"
## CCD24 "CCD24"
## Legionella "Legionella"
## Candidatus_Ovatusbacter "Candidatus_Ovatusbacter"
## JTB255_marine_benthic_group "JTB255_marine_benthic_group"
## Pantoea "Pantoea"
## Escherichia-Shigella "Escherichia-Shigella"
## Serratia "Serratia"
## Enterobacter "Enterobacter"
## Klebsiella "Klebsiella"
## KI89A_clade "KI89A_clade"
## Gammaproteobacteria "Gammaproteobacteria"
## Pseudomonas "Pseudomonas"
## OM60NOR5_clade "OM60(NOR5)_clade"
## Enhydrobacter "Enhydrobacter"
## Acinetobacter "Acinetobacter"
## Alkanindiges "Alkanindiges"
## Cavicella "Cavicella"
## Aquicella "Aquicella"
## Dinghuibacter "Dinghuibacter"
## Chitinophaga "Chitinophaga"
## Flavihumibacter "Flavihumibacter"
## Parafilimonas "Parafilimonas"
## Filimonas "Filimonas"
## Terrimonas "Terrimonas"
## Ferruginibacter "Ferruginibacter"
## UTBCD1 "UTBCD1"
## Puia "Puia"
## Flavitalea "Flavitalea"
## Niastella "Niastella"
## Flavisolibacter "Flavisolibacter"
## Edaphobaculum "Edaphobaculum"
## Taibaiella "Taibaiella"
## Sphingobacterium "Sphingobacterium"
## Olivibacter "Olivibacter"
## Mucilaginibacter "Mucilaginibacter"
## Solitalea "Solitalea"
## env.OPS_17 "env.OPS_17"
## AKYH767 "AKYH767"
## NS11-12_marine_group "NS11-12_marine_group"
## CWT_CU03-E12 "CWT_CU03-E12"
## Bacteroidetes_VC2.1_Bac22 "Bacteroidetes_VC2.1_Bac22"
## Chryseobacterium "Chryseobacterium"
## Cloacibacterium "Cloacibacterium"
## Flavobacterium "Flavobacterium"
## Candidatus_Brownia "Candidatus_Brownia"
## Rhodocytophaga "Rhodocytophaga"
## Dyadobacter "Dyadobacter"
## Chryseolinea "Chryseolinea"
## Ohtaekwangia "Ohtaekwangia"
## OLB12 "OLB12"
## Adhaeribacter "Adhaeribacter"
## Nevskia "Nevskia"
## Pseudoxanthomonas "Pseudoxanthomonas"
## Stenotrophomonas "Stenotrophomonas"
## Xanthomonas "Xanthomonas"
## Fulvimonas "Fulvimonas"
## Rudaea "Rudaea"
## Dyella "Dyella"
## Luteibacter "Luteibacter"
## Rhodanobacter "Rhodanobacter"
## Arenimonas "Arenimonas"
## Lysobacter "Lysobacter"
## Massilia "Massilia"
## Silvimonas "Silvimonas"
## Piscinibacter "Piscinibacter"
## Rhizobacter "Rhizobacter"
## Variovorax "Variovorax"
## Caenimonas "Caenimonas"
## Curvibacter "Curvibacter"
## Ramlibacter "Ramlibacter"
## Azohydromonas "Azohydromonas"
## Paucibacter "Paucibacter"
## Pelomonas "Pelomonas"
## Roseateles "Roseateles"
## Comamonas "Comamonas"
## Pandoraea "Pandoraea"
## Bordetella "Bordetella"
## Achromobacter "Achromobacter"
## Noviherbaspirillum "Noviherbaspirillum"
## Cupriavidus "Cupriavidus"
## Herbaspirillum "Herbaspirillum"
## mle1-7 "mle1-7"
## TRA3-20 "TRA3-20"
## GOUTA6 "GOUTA6"
## MND1 "MND1"
## IS-44 "IS-44"
## A21b "A21b"
## SC-I-84 "SC-I-84"
## Nitrosospira "Nitrosospira"
## B1-7BS "B1-7BS"
## Niveibacterium "Niveibacterium"
## Ellin6067 "Ellin6067"
## Burkholderia-Caballeronia-Paraburkholderia "Burkholderia-Caballeronia-Paraburkholderia"
# check data
table(meta(ps4)$Location, meta(ps4)$Source)##
## Rhizhomorph Soil associated Stipe
## El Castillo/Zentla 8 9 9
## Monte obscuro Emiliano Zapata 2 7 7
## Naolinco 10 10 10
#subsets
ps_ElCastillo <- subset_samples(ps4, Location == "El Castillo/Zentla")
ps_MonteObscuro <- subset_samples(ps4, Location == "Monte obscuro Emiliano Zapata")
ps_Naolinco <- subset_samples(ps4, Location == "Naolinco")
ps_ElCastillo## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 26 samples ]
## sample_data() Sample Data: [ 26 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps_MonteObscuro## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps_Naolinco## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 30 samples ]
## sample_data() Sample Data: [ 30 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
#check data
microbiome::summarize_phyloseq(ps_ElCastillo)## Compositional = NO2
## 1] Min. number of reads = 322442] Max. number of reads = 676833] Total number of reads = 13117814] Average number of reads = 50453.11538461545] Median number of reads = 516897] Sparsity = 0.82902444202986] Any OTU sum to 1 or less? YES8] Number of singletons = 28859] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 32244"
##
## [[2]]
## [1] "2] Max. number of reads = 67683"
##
## [[3]]
## [1] "3] Total number of reads = 1311781"
##
## [[4]]
## [1] "4] Average number of reads = 50453.1153846154"
##
## [[5]]
## [1] "5] Median number of reads = 51689"
##
## [[6]]
## [1] "7] Sparsity = 0.8290244420298"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 2885"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
microbiome::summarize_phyloseq(ps_MonteObscuro)## Compositional = NO2
## 1] Min. number of reads = 238902] Max. number of reads = 682953] Total number of reads = 7181394] Average number of reads = 44883.68755] Median number of reads = 414317] Sparsity = 0.7536531904529966] Any OTU sum to 1 or less? YES8] Number of singletons = 24069] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 23890"
##
## [[2]]
## [1] "2] Max. number of reads = 68295"
##
## [[3]]
## [1] "3] Total number of reads = 718139"
##
## [[4]]
## [1] "4] Average number of reads = 44883.6875"
##
## [[5]]
## [1] "5] Median number of reads = 41431"
##
## [[6]]
## [1] "7] Sparsity = 0.753653190452996"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 2406"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
microbiome::summarize_phyloseq(ps_Naolinco)## Compositional = NO2
## 1] Min. number of reads = 442602] Max. number of reads = 1738693] Total number of reads = 27092974] Average number of reads = 90309.95] Median number of reads = 874747] Sparsity = 0.7894625750933596] Any OTU sum to 1 or less? YES8] Number of singletons = 32619] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 44260"
##
## [[2]]
## [1] "2] Max. number of reads = 173869"
##
## [[3]]
## [1] "3] Total number of reads = 2709297"
##
## [[4]]
## [1] "4] Average number of reads = 90309.9"
##
## [[5]]
## [1] "5] Median number of reads = 87474"
##
## [[6]]
## [1] "7] Sparsity = 0.789462575093359"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 3261"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
Remove singletons
#Delete singletons
ps_ElCastillo <- filter_taxa(ps_ElCastillo, function(x) sum(x) > 1, TRUE)
ps_MonteObscuro <- filter_taxa(ps_MonteObscuro, function(x) sum(x) > 1, TRUE)
ps_Naolinco <- filter_taxa(ps_Naolinco, function(x) sum(x) > 1, TRUE)
#check
ps_ElCastillo## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3274 taxa and 26 samples ]
## sample_data() Sample Data: [ 26 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 3274 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 3274 tips and 3273 internal nodes ]
ps_MonteObscuro## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3753 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 3753 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 3753 tips and 3752 internal nodes ]
ps_Naolinco## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2898 taxa and 30 samples ]
## sample_data() Sample Data: [ 30 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 2898 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2898 tips and 2897 internal nodes ]
# Check
table(meta(ps_ElCastillo_rel_f)$Source)##
## Rhizhomorph Soil associated Stipe
## 8 9 9
table(meta(ps_MonteObscuro_rel_f)$Source)##
## Rhizhomorph Soil associated Stipe
## 2 7 7
table(meta(ps_Naolinco_rel_f)$Source)##
## Rhizhomorph Soil associated Stipe
## 10 10 10
library(microbiome)
library(microbiomeutilities)
#function
convert_to_compositional_and_index_reformat <- function(ps) {
ps_rel <- microbiome::transform(ps, "compositional")
ps_rel_f <- format_to_besthit(ps_rel)
return(ps_rel_f)
}
# phyloseq objects
phyloseq_objects <- list(
ps_ElCastillo = ps_ElCastillo,
ps_MonteObscuro = ps_MonteObscuro,
ps_Naolinco = ps_Naolinco,
ps4 = ps4
)
# Apply
phyloseq_objects_rel <- lapply(phyloseq_objects, convert_to_compositional_and_index_reformat)
# Subset
ps_ElCastillo_rel_f <- phyloseq_objects_rel$ps_ElCastillo
ps_MonteObscuro_rel_f <- phyloseq_objects_rel$ps_MonteObscuro
ps_Naolinco_rel_f <- phyloseq_objects_rel$ps_Naolinco
ps_full_rel_f <- phyloseq_objects_rel$ps4# Get full core with microbiome
full_ps_core_relative <- core(ps_full_rel_f, 0, 0.5)
full_ps_core_counts <- core(ps4, 0, 0.5)
# Collapse the rare taxa into an “Other” category
#full_ps_core_fltOthers <- aggregate_rare(ps4_rel, "Genus", detection = 0, prevalence = 0.5)
# Compare phyloseq objects
ps_full_rel_f## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps4## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
full_ps_core_relative## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 365 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 365 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 365 tips and 364 internal nodes ]
full_ps_core_counts## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 365 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 365 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 365 tips and 364 internal nodes ]
microbiome::summarize_phyloseq(full_ps_core_counts)## Compositional = NO2
## 1] Min. number of reads = 32892] Max. number of reads = 1320233] Total number of reads = 22961414] Average number of reads = 31890.84722222225] Median number of reads = 26767.57] Sparsity = 0.3817351598173526] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 3289"
##
## [[2]]
## [1] "2] Max. number of reads = 132023"
##
## [[3]]
## [1] "3] Total number of reads = 2296141"
##
## [[4]]
## [1] "4] Average number of reads = 31890.8472222222"
##
## [[5]]
## [1] "5] Median number of reads = 26767.5"
##
## [[6]]
## [1] "7] Sparsity = 0.381735159817352"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
Create full, core and location ampvis objects
library(tibble)
library(ampvis2)
# Ampvis object function
create_ampvis_object <- function(pseq) {
# create and extract otu table
otu_table_ampvis <- data.frame(OTU = rownames(phyloseq::otu_table(pseq)@.Data),
phyloseq::otu_table(pseq)@.Data,
phyloseq::tax_table(pseq)@.Data,
check.names = FALSE)
# Metadata
meta_data_ampvis <- data.frame(phyloseq::sample_data(pseq),
check.names = FALSE)
# change index by SampleID
meta_data_ampvis <- meta_data_ampvis %>% rownames_to_column(var = "SampleID")
# ampvis object
av2 <- amp_load(otu_table_ampvis, meta_data_ampvis)
return(av2)
}# phyloseq objects
phyloseq_objects <- list(ps4 = ps4, ps_ElCastillo = ps_ElCastillo, ps_MonteObscuro = ps_MonteObscuro, ps_Naolinco = ps_Naolinco, full_ps_core_counts = full_ps_core_counts)
# Apply function
ampvis_objects <- lapply(phyloseq_objects, create_ampvis_object)
ampvis_objects## $ps4
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 72 6159 4739217 23890 173869 59703.5
## Avg#Reads
## 65822.46
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 6159(100%) 6159(100%) 6128(99.5%) 6058(98.36%) 5483(89.02%) 4442(72.12%)
## Species
## 1182(19.19%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $ps_ElCastillo
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 26 3274 1311781 32244 67683 51689
## Avg#Reads
## 50453.12
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 3274(100%) 3274(100%) 3256(99.45%) 3216(98.23%) 2907(88.79%) 2311(70.59%)
## Species
## 705(21.53%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $ps_MonteObscuro
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 16 3753 718139 23890 68295 41431
## Avg#Reads
## 44883.69
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 3753(100%) 3753(100%) 3734(99.49%) 3692(98.37%) 3393(90.41%) 2815(75.01%)
## Species
## 615(16.39%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $ps_Naolinco
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 30 2898 2709297 44260 173869 87474
## Avg#Reads
## 90309.9
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 2898(100%) 2898(100%) 2887(99.62%) 2848(98.27%) 2570(88.68%) 2093(72.22%)
## Species
## 570(19.67%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $full_ps_core_counts
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 72 365 2296141 3289 132023 26767.5
## Avg#Reads
## 31890.85
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 365(100%) 365(100%) 363(99.45%) 358(98.08%) 331(90.68%) 262(71.78%)
## Species
## 80(21.92%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
#subset amp obj
phyloseq_objects <- list(ps4 = ps4, ps_ElCastillo = ps_ElCastillo, ps_MonteObscuro = ps_MonteObscuro, ps_Naolinco = ps_Naolinco, full_ps_core_counts = full_ps_core_counts)
av2_full <- ampvis_objects$ps4
av2_core <- ampvis_objects$full_ps_core_counts
av2_ElCastillo <- ampvis_objects$ps_ElCastillo
av2_MonteObscuro <-ampvis_objects$ps_MonteObscuro
av2_Naolinco <- ampvis_objects$ps_Naolinco# Full
core_source_av2_full <- amp_venn(av2_full, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_full <- core_source_av2_full$Otutable
#save core compartments otu table
write.table(core_otu_table_av2_full, "results/03.Diversity/Core_compartments_otu_table_ampvis_full.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)# El Castillo
core_source_av2_ElCastillo <- amp_venn(av2_ElCastillo, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_ElCastillo <- core_source_av2_ElCastillo$Otutable
write.table(core_otu_table_av2_ElCastillo, "results/03.Diversity/Core_compartments_otu_table_ampvis_ElCastillo.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# Monte Obscuro
core_source_av2_MonteObscuro <- amp_venn(av2_MonteObscuro, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_MonteObscuro <- core_source_av2_MonteObscuro$Otutable
write.table(core_otu_table_av2_MonteObscuro, "results/03.Diversity/Core_compartments_otu_table_ampvis_MonteObscuro.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# Naolinco
core_source_av2_Naolinco <- amp_venn(av2_Naolinco, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_Naolinco <- core_source_av2_Naolinco$Otutable
write.table(core_otu_table_av2_Naolinco, "results/03.Diversity/Core_compartments_otu_table_ampvis_Naolinco.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)Based here
Calculate core between Compartments
# count number of samples in each group
table(meta(ps4)$Source, useNA = "always")##
## Rhizhomorph Soil associated Stipe <NA>
## 20 26 26 0
#List of compartments
compartments <- unique(as.character(meta(ps_full_rel_f)$Source))
print(compartments)## [1] "Stipe" "Soil associated" "Rhizhomorph"
#Write a for loop to go through each of the compartments one by one and combine identified core taxa into a list.
list_core_full <- list()# an empty object to store information
for (compartment in compartments){
#print(paste0("Identifying Core Taxa for ", compartment))
ps_sub_full <- subset_samples(ps_full_rel_f, Source == compartment)
core_m_full <- core_members(ps_sub_full,
detection = 0, # 100 % samples
prevalence = 0.5) # 50% prevalence
print(paste0("No. of core taxa in ", compartment, " : ", length(core_m_full)))
# add to a list core taxa for each group.
list_core_full[[compartment]] <- core_m_full
#create core compartment variable
variable_name_full <- paste0("core_full_", gsub(" ", "_", compartment))
assign(variable_name_full, core_m_full, envir = .GlobalEnv)
#write all cores
file_name_full <- paste0("results/03.Diversity/",variable_name,".csv")
write.csv(core_m_full, file = file_name_full, row.names = FALSE)
}## [1] "No. of core taxa in Stipe : 135"
## [1] "No. of core taxa in Soil associated : 902"
## [1] "No. of core taxa in Rhizhomorph : 536"
Plot venn diagram
library(eulerr)
# factor to reorder plot
desired_order <- c("Soil associated", "Rhizhomorph", "Stipe")
# Reorder
list_core_ordered <- list_core_full[desired_order]
# Circular venn
venn_colors <- c(`Soil associated`="#0073C2FF", Rhizhomorph="#EFC000FF", Stipe="#868686FF")
plot_venn_venn <- plot(venn(list_core_ordered), fills = venn_colors, alpha = 0.9)
plot_venn_venn# Real size venn
euler_venn <- euler(list_core_ordered, shape = "ellipse")
plot_euler_venn <- plot(euler_venn, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_euler_vennGet core identities
Define colors and order
# factor to reorder plot
desired_order <- c("Soil associated", "Rhizhomorph", "Stipe")
venn_colors <- c(`Soil associated`="#0073C2FF", Rhizhomorph="#EFC000FF", Stipe="#868686FF")El Castillo
# El Castillo
compartmentsEC <- unique(as.character(meta(ps_ElCastillo_rel_f)$Source))
# Lista para guardar los core taxa de cada compartimento
list_core_taxaEC <- list()
for (compartment in compartmentsEC) {
ps_sub <- subset_samples(ps_ElCastillo_rel_f, Source == compartment)
core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
# Guardar los resultados en una lista
list_core_taxaEC[[compartment]] <- core_m
cat("Compartimiento:", compartment, "\n")
cat("Número de core taxa:", length(core_m), "\n")
cat("Algunos core taxa:", head(core_m), "\n")
}## Compartimiento: Stipe
## Número de core taxa: 310
## Algunos core taxa: ASV_1894:f__Pirellulaceae ASV_1226:uncultured_Planctomyces ASV_1830:uncultured_Planctomyces ASV_763:uncultured_Planctomyces ASV_3514:uncultured_Planctomyces ASV_2226:uncultured_Planctomycetaceae
## Compartimiento: Rhizhomorph
## Número de core taxa: 582
## Algunos core taxa: ASV_3775:g__WD2101_soil_group ASV_2220:g__WD2101_soil_group ASV_1924:uncultured_Pasteuria ASV_765:uncultured_Pasteuria ASV_2059:f__Pirellulaceae ASV_1226:uncultured_Planctomyces
## Compartimiento: Soil associated
## Número de core taxa: 1239
## Algunos core taxa: ASV_5678:g__WD2101_soil_group ASV_3775:g__WD2101_soil_group ASV_3038:g__WD2101_soil_group ASV_5668:Planctomycetales_bacterium ASV_2220:g__WD2101_soil_group ASV_2821:g__WD2101_soil_group
#Plot venn
library(eulerr)
# Reorder
list_core_ordered_EC <- list_core_taxaEC[desired_order]
# Circular venn
plot_venn_venn_EC <- plot(venn(list_core_ordered_EC), fills = venn_colors, alpha = 0.9)
# Real size venn
euler_venn_EC <- euler(list_core_ordered_EC, shape = "ellipse")
plot_euler_venn_EC <- plot(euler_venn_EC, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_venn_venn_ECplot_euler_venn_ECMonte Obscuro
# El Castillo
compartmentsMO <- unique(as.character(meta(ps_MonteObscuro_rel_f)$Source))
# Lista para guardar los core taxa de cada compartimento
list_core_taxaMO <- list()
for (compartment in compartmentsMO) {
ps_sub <- subset_samples(ps_MonteObscuro_rel_f, Source == compartment)
core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
# Guardar los resultados en una lista
list_core_taxaMO[[compartment]] <- core_m
cat("Compartment:", compartment, "\n")
cat("Number of core taxa:", length(core_m), "\n")
cat("Some core taxa:", head(core_m), "\n")
}## Compartment: Stipe
## Number of core taxa: 374
## Some core taxa: ASV_1035:uncultured_Planctomycetaceae ASV_1218:g__Pir4_lineage ASV_800:uncultured_Planctomycetaceae ASV_498:g__Pir4_lineage ASV_4143:uncultured_soil ASV_2265:f__Pirellulaceae
## Compartment: Soil associated
## Number of core taxa: 2170
## Some core taxa: ASV_1034:uncultured_planctomycete ASV_1428:g__WD2101_soil_group ASV_4955:g__WD2101_soil_group ASV_2133:g__WD2101_soil_group ASV_4953:uncultured_planctomycete ASV_1797:g__WD2101_soil_group
## Compartment: Rhizhomorph
## Number of core taxa: 1285
## Some core taxa: ASV_2133:g__WD2101_soil_group ASV_4953:uncultured_planctomycete ASV_1797:g__WD2101_soil_group ASV_1945:g__WD2101_soil_group ASV_2459:g__WD2101_soil_group ASV_779:g__WD2101_soil_group
#Plot venn
library(eulerr)
# Reorder
list_core_ordered_MO <- list_core_taxaMO[desired_order]
# Circular venn
plot_venn_venn_MO <- plot(venn(list_core_ordered_MO), fills = venn_colors, alpha = 0.9)
# Real size venn
euler_venn_MO <- euler(list_core_ordered_MO, shape = "ellipse")
plot_euler_venn_MO <- plot(euler_venn_MO, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_venn_venn_MOplot_euler_venn_MONaolinco
# El Castillo
compartmentsN <- unique(as.character(meta(ps_Naolinco_rel_f)$Source))
# Lista para guardar los core taxa de cada compartimento
list_core_taxaN <- list()
for (compartment in compartmentsN) {
ps_sub <- subset_samples(ps_Naolinco_rel_f, Source == compartment)
core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
# Guardar los resultados en una lista
list_core_taxaN[[compartment]] <- core_m
cat("Compartment:", compartment, "\n")
cat("Number of core taxa:", length(core_m), "\n")
cat("Some core taxa:", head(core_m), "\n")
}## Compartment: Stipe
## Number of core taxa: 378
## Some core taxa: ASV_779:g__WD2101_soil_group ASV_800:uncultured_Planctomycetaceae ASV_765:uncultured_Pasteuria ASV_1894:f__Pirellulaceae ASV_1044:g__Gemmata ASV_939:uncultured_Prosthecobacter
## Compartment: Rhizhomorph
## Number of core taxa: 990
## Some core taxa: ASV_1797:g__WD2101_soil_group ASV_1823:uncultured_planctomycete ASV_779:g__WD2101_soil_group ASV_1560:g__Pir4_lineage ASV_800:uncultured_Planctomycetaceae ASV_498:g__Pir4_lineage
## Compartment: Soil associated
## Number of core taxa: 1735
## Some core taxa: ASV_1034:uncultured_planctomycete ASV_2133:g__WD2101_soil_group ASV_1797:g__WD2101_soil_group ASV_1823:uncultured_planctomycete ASV_1945:g__WD2101_soil_group ASV_2459:g__WD2101_soil_group
#Plot venn
library(eulerr)
# Reorder
list_core_ordered_N <- list_core_taxaN[desired_order]
# Circular venn
plot_venn_venn_N <- plot(venn(list_core_ordered_N), fills = venn_colors, alpha = 0.9)
# Real size venn
euler_venn_N <- euler(list_core_ordered_N, shape = "ellipse")
plot_euler_venn_N <- plot(euler_venn_N, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_venn_venn_Nplot_euler_venn_NCombine plots
library(cowplot)
#Combine venn plot
title_vennplot <- ggdraw() + draw_label("Core Microbiome", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
plot_venn_venn_fmt <- plot_grid(plot_venn_venn, labels = c("A"), ncol = 1)
venn_loc_plots <- plot_grid(plot_venn_venn_EC, plot_venn_venn_MO, plot_venn_venn_N, labels = c("B", "C", "D"), ncol = 3)
venn_all_plots <- plot_grid(title_vennplot, plot_venn_venn_fmt, venn_loc_plots, ncol = 1, rel_heights = c(0.2, 2, 1.7))
venn_all_plotslibrary(cowplot)
#Combine euler plot
title_vennplot <- ggdraw() + draw_label("Core Microbiome", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
plot_euler_venn_fmt <- plot_grid(plot_euler_venn, labels = c("A"), ncol = 1)
euler_venn_loc_plots <- plot_grid(plot_euler_venn_EC, plot_euler_venn_MO, plot_euler_venn_N, labels = c("B", "C", "D"), ncol = 3)
euler_venn_all_plots <- plot_grid(title_vennplot, plot_euler_venn_fmt, euler_venn_loc_plots, ncol = 1, rel_heights = c(0.2, 2, 1.7))
euler_venn_all_plotsAll
#Factor Source
av2_full$metadata$Compartment <- factor(av2_full$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Full all samples
ampv_heatmap_abundances_genus_full <- amp_heatmap(av2_full,
group_by = "Compartment",
facet_by = "Location",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))Locations
#Factor Source
av2_ElCastillo$metadata$Compartment <- factor(av2_ElCastillo$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# El castillo
ampv_heatmap_abundances_genus_EC <- amp_heatmap(av2_ElCastillo,
group_by = "Compartment",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))
#Factor Source
av2_MonteObscuro$metadata$Compartment <- factor(av2_MonteObscuro$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Monte Obscuro
ampv_heatmap_abundances_genus_MO <- amp_heatmap(av2_MonteObscuro,
group_by = "Compartment",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))
#Factor Source
av2_Naolinco$metadata$Compartment <- factor(av2_Naolinco$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Naolinco
ampv_heatmap_abundances_genus_N <- amp_heatmap(av2_Naolinco,
group_by = "Compartment",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))Combine plots
library(cowplot)
#Combine venn plot
title_abund_plot <- ggdraw() + draw_label("Relative Abundance", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
ampv_abund_all_g_fmt <- plot_grid(ampv_heatmap_abundances_genus_full, labels = c("A"), ncol = 1)## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
ampv_abund_loc_g <- plot_grid(ampv_heatmap_abundances_genus_EC, ampv_heatmap_abundances_genus_MO, ampv_heatmap_abundances_genus_N, labels = c("B", "C", "D"), ncol = 3)## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
ampv_all_g_plots <- plot_grid(title_abund_plot, ampv_abund_all_g_fmt, ampv_abund_loc_g, ncol = 1, rel_heights = c(0.2, 1, 1))
ampv_all_g_plots
### Core
#Factor Source
av2_core$metadata$Compartment <- factor(av2_core$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Full all samples
ampv_heatmap_abundances_genus_core <- amp_heatmap(av2_core,
group_by = "Compartment",
facet_by = "Location",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))
ampv_heatmap_abundances_genus_core## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
All
library(ampvis2)
library(ggplot2)
#Factor Source
av2_full$metadata$Compartment <- factor(av2_full$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_full <- amp_rankabundance(av2_full, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
#ggsave("results/plots/02.diversity/02.rank_abundance.png", rank_ab , width = 5, height = 4)Location
# El Castillo
#Factor Source
av2_ElCastillo$metadata$Compartment <- factor(av2_ElCastillo$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_EC <- amp_rankabundance(av2_ElCastillo, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
# Monte Obscuro
#Factor Source
av2_MonteObscuro$metadata$Compartment <- factor(av2_MonteObscuro$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_MO <- amp_rankabundance(av2_MonteObscuro, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
# Naolinco
#Factor Source
av2_Naolinco$metadata$Compartment <- factor(av2_Naolinco$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_N <- amp_rankabundance(av2_Naolinco, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()Combine
library(cowplot)
#Combine euler plot
title_rank_plot <- ggdraw() + draw_label("Relative diversity and species evenness", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
rank_ab_full_EC <- plot_grid(rank_ab_full, rank_ab_EC, labels = c("A", "B"), ncol = 2)## Warning: Removed 1887 rows containing missing values or values outside the scale range
## (`geom_line()`).
rank_ab_MO_N_plots <- plot_grid(rank_ab_MO, rank_ab_N, labels = c("C", "D"), ncol = 2)
rankabund_all_plots <- plot_grid(title_rank_plot, rank_ab_full_EC, rank_ab_MO_N_plots, ncol = 1, rel_heights = c(0.16, 1, 1))
rankabund_all_plotslibrary(ampvis2)
library(ggplot2)
#Factor Source
av2_core$metadata$Compartment <- factor(av2_core$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_full_core <- amp_rankabundance(av2_core, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
rank_ab_full_corelibrary(hilldiv)## Registered S3 methods overwritten by 'FSA':
## method from
## confint.boot car
## hist.boot car
library(tidyverse)## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ microbiome::alpha() masks ggplot2::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(kableExtra)##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
# Get hill numbers
q0 <- hill_div(otu_table_fltr, qvalue = 0)
q1 <- hill_div(otu_table_fltr, qvalue = 1)
q2 <- hill_div(otu_table_fltr, qvalue = 2)
# Merge metadata with Hill numbers
q012_all <- cbind(q0, q1, q2) %>% as.data.frame() %>% rownames_to_column(var = "sample-id")
metadata_with_hill_fltr <- q012_all %>%
inner_join(metadata_fltr, by = c("sample-id"="sample-id"))
#save table
write.table(metadata_with_hill_fltr, "results/03.Diversity/Metadata_with_hill.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# show
metadata_with_hill_fltr %>% head() %>% kable()| sample-id | q0 | q1 | q2 | sample | Source | Date | Year | Location | Week | Raw reads | Colect_number | Specimen | quant_reading | Effective_reads | ASVs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| DR1014-E | 135 | 9.036873 | 3.171788 | DR1014-E | Stipe | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 159896 | DRamos 1014 | Ind_1 | 948 | 68295 | 135 |
| DR1014-SAH | 2118 | 1145.961220 | 576.966109 | DR1014-SAH | Soil associated | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 223804 | DRamos 1014 | Ind_1 | 3071 | 40974 | 2118 |
| DR1018-E | 768 | 47.874948 | 5.350837 | DR1018-E | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 68578 | DRamos 1018 | Ind_3 | 97 | 23890 | 768 |
| DR1018-SAH | 1824 | 887.566616 | 436.200764 | DR1018-SAH | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 211120 | DRamos 1018 | Ind_3 | 4017 | 38161 | 1824 |
| DR1019-E | 1012 | 447.167799 | 145.072574 | DR1019-E | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 144270 | DRamos 1019 | Ind_4 | 215 | 33853 | 1012 |
| DR1019-SAH | 2202 | 1317.872289 | 723.411536 | DR1019-SAH | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 185380 | DRamos 1019 | Ind_4 | 3754 | 34576 | 2202 |
Get means
# Reorder q values
meta_qs <- metadata_with_hill_fltr %>%
pivot_longer(cols = q0:q2, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0", "q1", "q2")) %>%
mutate(
qs = case_when(
q == "q0" ~ "q=0",
q == "q1" ~ "q=1",
q == "q2" ~ "q=2"
))
#Get means of Hill numbers
means <- meta_qs %>% group_by(Source, Location, qs) %>%
summarise(means = mean(value, na.rm = TRUE),
sd = sd(value, na.rm = TRUE),
.groups = 'drop')
#save table
write.table(means, "results/03.Diversity/Hill_means_sd.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
print(means)## # A tibble: 27 × 5
## Source Location qs means sd
## <fct> <fct> <chr> <dbl> <dbl>
## 1 Soil associated El Castillo/Zentla q=0 1378. 201.
## 2 Soil associated El Castillo/Zentla q=1 531. 88.4
## 3 Soil associated El Castillo/Zentla q=2 183. 47.6
## 4 Soil associated Monte obscuro Emiliano Zapata q=0 2100. 154.
## 5 Soil associated Monte obscuro Emiliano Zapata q=1 1139. 142.
## 6 Soil associated Monte obscuro Emiliano Zapata q=2 591. 114.
## 7 Soil associated Naolinco q=0 1733. 131.
## 8 Soil associated Naolinco q=1 524. 165.
## 9 Soil associated Naolinco q=2 146. 101.
## 10 Rhizhomorph El Castillo/Zentla q=0 1103. 256.
## # ℹ 17 more rows
library(ggpubr) ##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
## The following object is masked from 'package:qiime2R':
##
## mean_sd
# factor to reorder plot
metadata_with_hill_fltr$Location <- factor(metadata_with_hill_fltr$Location)
metadata_with_hill_fltr$Compartment <- factor(metadata_with_hill_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
#plot
hill_barplot_explore_fltr <- metadata_with_hill_fltr %>%
pivot_longer(cols = q0:q2, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0", "q1", "q2")) %>%
mutate(
qs = case_when(
q == "q0" ~ "q=0",
q == "q1" ~ "q=1",
q == "q2" ~ "q=2"
)) %>%
ggbarplot(
x = "Compartment",
y = "value",
add = "mean_se",
facet.by = c("qs", "Location"),
fill = "Compartment"
) +
scale_fill_jco() +
geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
facet_grid(rows = vars(qs), cols = vars(Location), scales = "free_y") +
theme(
strip.background = element_blank(), #element_rect(fill = "")
strip.text.x = element_text(face= "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face= "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
#panel.border = element_blank(),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11),
legend.position = "bottom"
) +
labs(
fill = "Compartment",
y = "",
x = "", #,
title = "Biodiversity across Locations and Compartments"
) + #+
#theme(plot.title = element_text(hjust = 0.5))
theme(legend.text = element_text(size = 12)) #+
#stat_summary(fun = mean, geom = "line", aes(group = Compartment), color = "red")
hill_barplot_explore_fltrggsave("results/plots/02.diversity/02.Hill_diversity_across_Locations_and_Compartments_fltr.png", hill_barplot_explore_fltr, width = 11, height = 11)Alpha diversity depth correlation to order q=0
#install.packages("ggpubr")
library(ggpubr)
q0_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr,
x = "Effective_reads",
y = "q0",
xlab= "Sequencing depth (number of reads)",
ylab = "q=0",
#ylab="Alpha diversity q=0 (effective number of total ASVs)",
add = "reg.line", # Add regression line
add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
)+
theme(legend.title = element_blank(), legend.position = "none")Alpha diversity depth correlation to order q=1
q1_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr,
x = "Effective_reads",
y = "q1",
xlab= "Sequencing depth (number of reads)",
ylab = "q=1",
#ylab="Alpha diversity q=1 (effective number of total ASVs)",
add = "reg.line", # Add regression line
add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
)+
theme(legend.title = element_blank(), legend.position = "none")Alpha diversity depth correlation to order q=2
q2_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr,
x = "Effective_reads",
y = "q2",
xlab= "Sequencing depth (number of reads)",
ylab = "q=2",
#ylab="Alpha diversity q=2 (effective number of total ASVs)",
add = "reg.line", # Add regression line
add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
)+
theme(legend.title = element_blank(), legend.position = "none")library(ggplot2)
library(cowplot)
#save plots
#Combine plot
title_corr_plot <- ggdraw() + draw_label("Alpha diversity depth correlation with fltr samples")
correlation_plot_q012_fltr <- plot_grid(title_corr_plot, q0_vs_depth_fltr,q1_vs_depth_fltr,q2_vs_depth_fltr, labels = c(" ","A", "B", "C"), ncol = 1, rel_heights = c(0.15, 1, 1, 1))
correlation_plot_q012_fltr#save plot
ggsave("results/plots/02.diversity/03.alpha_depth_correlation_fltr_samples.png", correlation_plot_q012_fltr, width = 8, height = 8)Shapiro test to Hill numbers
library(ggplot2)
library(cowplot)
#Shapiro test
shapiro_q0_fltr <- shapiro.test(metadata_with_hill_fltr$q0)
shapiro_q0_fltr_pvalue <- round(shapiro_q0_fltr$p.value, 5)
shapiro_q1_fltr <- shapiro.test(metadata_with_hill_fltr$q1)
shapiro_q1_fltr_pvalue <- round(shapiro_q1_fltr$p.value, 5)
shapiro_q2_fltr <- shapiro.test(metadata_with_hill_fltr$q2)
shapiro_q2_fltr_pvalue <- round(shapiro_q2_fltr$p.value, 9)
#Histograms
histplot_q0_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q0, xlab="q=0")) +
geom_histogram(fill = "lightblue", bins = 15) +
ggtitle(paste("Shapiro, p-value:", shapiro_q0_fltr_pvalue)) +
theme_bw() + xlab("q=0") + ylab("Frequency")
histplot_q1_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q1)) +
geom_histogram(fill = "lightblue", bins = 15) +
ggtitle(paste("Shapiro, p-value:", shapiro_q1_fltr_pvalue)) +
theme_bw() + xlab("q=1") + ylab("Frequency")
histplot_q2_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q2)) +
geom_histogram(fill = "lightblue", bins = 15) +
ggtitle(paste("Shapiro, p-value:", shapiro_q2_fltr_pvalue)) +
theme_bw() + xlab("q=2") + ylab("Frequency")
#Combine plot
title_plot <- ggdraw() + draw_label("Histogram of Hill diversity") #, fontface = 'bold', x = 0.5, hjust = 0.5)
histplot_q012_fltr <- plot_grid(title_plot, histplot_q0_fltr, histplot_q1_fltr, histplot_q2_fltr, labels = c(" ","A", "B", "C"), ncol = 1, rel_heights = c(0.15, 1, 1, 1))
histplot_q012_fltr#save plot
ggsave("results/plots/02.diversity/04.hill_normality_test_histogram_fltr_samples.png", histplot_q012_fltr)## Saving 7 x 5 in image
A pesar de que q0 tiene una distribución normal, dado que los datos del proyecto tienen medidas repetidas y desbalanceo, lo adecuado es hacer comparaciones basadas en modelos. En este caso modelos lineales mixtos
Estos modelos son útiles cuando las observaciones tienen estructuras de dependencia, como datos longitudinales o datos agrupados/anidados. Los modelos lineales mixtos incluyen tanto efectos fijos como efectos aleatorios, donde los efectos fijos son comunes a todas las unidades/individuos, y los efectos aleatorios varían por grupo o unidad.
lme
library(nlme)
# lme
# q0
q0_lme_fltr <- lme(q0 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q0_lme_perm_fltr <- PermTest(q0_lme_fltr, B = 100)
# q1
q1_lme_fltr <- lme(q1 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q1_lme_perm_fltr <- PermTest(q1_lme_fltr, B = 100)
# q2
q2_lme_fltr <- lme(q2 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q2_lme_perm_fltr <- PermTest(q2_lme_fltr, B = 100)glm
#q0 glm
q0_glm_fltr <- glm(q0 ~ log(Effective_reads)+Source*Location,
family = poisson(link = "log"),
data = metadata_with_hill_fltr)
#q1 glm
q1_glm_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
family = poisson(link = "log"),
data = metadata_with_hill_fltr)## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.036873
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1145.961220
## Warning in dpois(y, mu, log = TRUE): non-integer x = 47.874948
## Warning in dpois(y, mu, log = TRUE): non-integer x = 887.566616
## Warning in dpois(y, mu, log = TRUE): non-integer x = 447.167799
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1317.872289
## Warning in dpois(y, mu, log = TRUE): non-integer x = 375.937389
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1212.407272
## Warning in dpois(y, mu, log = TRUE): non-integer x = 114.546532
## Warning in dpois(y, mu, log = TRUE): non-integer x = 689.583452
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1236.713620
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.775571
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1141.539572
## Warning in dpois(y, mu, log = TRUE): non-integer x = 198.051965
## Warning in dpois(y, mu, log = TRUE): non-integer x = 649.434099
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1033.757810
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.120060
## Warning in dpois(y, mu, log = TRUE): non-integer x = 236.465478
## Warning in dpois(y, mu, log = TRUE): non-integer x = 470.600863
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.056484
## Warning in dpois(y, mu, log = TRUE): non-integer x = 191.591875
## Warning in dpois(y, mu, log = TRUE): non-integer x = 515.230864
## Warning in dpois(y, mu, log = TRUE): non-integer x = 118.919348
## Warning in dpois(y, mu, log = TRUE): non-integer x = 286.196272
## Warning in dpois(y, mu, log = TRUE): non-integer x = 521.533059
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.243804
## Warning in dpois(y, mu, log = TRUE): non-integer x = 483.242102
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.992440
## Warning in dpois(y, mu, log = TRUE): non-integer x = 76.214330
## Warning in dpois(y, mu, log = TRUE): non-integer x = 502.299672
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.362994
## Warning in dpois(y, mu, log = TRUE): non-integer x = 406.547029
## Warning in dpois(y, mu, log = TRUE): non-integer x = 610.775331
## Warning in dpois(y, mu, log = TRUE): non-integer x = 456.279074
## Warning in dpois(y, mu, log = TRUE): non-integer x = 631.344825
## Warning in dpois(y, mu, log = TRUE): non-integer x = 625.359839
## Warning in dpois(y, mu, log = TRUE): non-integer x = 29.302276
## Warning in dpois(y, mu, log = TRUE): non-integer x = 99.082917
## Warning in dpois(y, mu, log = TRUE): non-integer x = 384.027703
## Warning in dpois(y, mu, log = TRUE): non-integer x = 160.920694
## Warning in dpois(y, mu, log = TRUE): non-integer x = 600.653719
## Warning in dpois(y, mu, log = TRUE): non-integer x = 667.285427
## Warning in dpois(y, mu, log = TRUE): non-integer x = 279.227229
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.358681
## Warning in dpois(y, mu, log = TRUE): non-integer x = 55.066246
## Warning in dpois(y, mu, log = TRUE): non-integer x = 141.925035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.144746
## Warning in dpois(y, mu, log = TRUE): non-integer x = 110.564833
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.028120
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.557778
## Warning in dpois(y, mu, log = TRUE): non-integer x = 211.697465
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.425755
## Warning in dpois(y, mu, log = TRUE): non-integer x = 351.503682
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.579458
## Warning in dpois(y, mu, log = TRUE): non-integer x = 250.463366
## Warning in dpois(y, mu, log = TRUE): non-integer x = 444.833675
## Warning in dpois(y, mu, log = TRUE): non-integer x = 124.745022
## Warning in dpois(y, mu, log = TRUE): non-integer x = 94.655639
## Warning in dpois(y, mu, log = TRUE): non-integer x = 305.856635
## Warning in dpois(y, mu, log = TRUE): non-integer x = 145.462631
## Warning in dpois(y, mu, log = TRUE): non-integer x = 177.480033
## Warning in dpois(y, mu, log = TRUE): non-integer x = 52.411210
## Warning in dpois(y, mu, log = TRUE): non-integer x = 516.220144
## Warning in dpois(y, mu, log = TRUE): non-integer x = 288.582443
## Warning in dpois(y, mu, log = TRUE): non-integer x = 398.115993
## Warning in dpois(y, mu, log = TRUE): non-integer x = 668.236422
## Warning in dpois(y, mu, log = TRUE): non-integer x = 486.264725
## Warning in dpois(y, mu, log = TRUE): non-integer x = 321.527670
## Warning in dpois(y, mu, log = TRUE): non-integer x = 531.867537
## Warning in dpois(y, mu, log = TRUE): non-integer x = 747.886370
## Warning in dpois(y, mu, log = TRUE): non-integer x = 769.539696
## Warning in dpois(y, mu, log = TRUE): non-integer x = 510.480594
#q2 glm
q2_glm_fltr <- glm(q2 ~ log(Effective_reads)+Source*Location,
family = poisson(link = "log"),
data = metadata_with_hill_fltr)## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.171788
## Warning in dpois(y, mu, log = TRUE): non-integer x = 576.966109
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.350837
## Warning in dpois(y, mu, log = TRUE): non-integer x = 436.200764
## Warning in dpois(y, mu, log = TRUE): non-integer x = 145.072574
## Warning in dpois(y, mu, log = TRUE): non-integer x = 723.411536
## Warning in dpois(y, mu, log = TRUE): non-integer x = 153.597535
## Warning in dpois(y, mu, log = TRUE): non-integer x = 666.541403
## Warning in dpois(y, mu, log = TRUE): non-integer x = 34.456157
## Warning in dpois(y, mu, log = TRUE): non-integer x = 178.728096
## Warning in dpois(y, mu, log = TRUE): non-integer x = 676.765401
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.375549
## Warning in dpois(y, mu, log = TRUE): non-integer x = 617.084909
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.482863
## Warning in dpois(y, mu, log = TRUE): non-integer x = 234.062638
## Warning in dpois(y, mu, log = TRUE): non-integer x = 441.625630
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.057934
## Warning in dpois(y, mu, log = TRUE): non-integer x = 62.632737
## Warning in dpois(y, mu, log = TRUE): non-integer x = 216.098156
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.800651
## Warning in dpois(y, mu, log = TRUE): non-integer x = 43.314536
## Warning in dpois(y, mu, log = TRUE): non-integer x = 139.622390
## Warning in dpois(y, mu, log = TRUE): non-integer x = 36.762232
## Warning in dpois(y, mu, log = TRUE): non-integer x = 79.269153
## Warning in dpois(y, mu, log = TRUE): non-integer x = 162.051399
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.485131
## Warning in dpois(y, mu, log = TRUE): non-integer x = 170.638980
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.551297
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.942091
## Warning in dpois(y, mu, log = TRUE): non-integer x = 127.678843
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.524882
## Warning in dpois(y, mu, log = TRUE): non-integer x = 122.819172
## Warning in dpois(y, mu, log = TRUE): non-integer x = 209.071791
## Warning in dpois(y, mu, log = TRUE): non-integer x = 103.936882
## Warning in dpois(y, mu, log = TRUE): non-integer x = 278.567311
## Warning in dpois(y, mu, log = TRUE): non-integer x = 168.692594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.734694
## Warning in dpois(y, mu, log = TRUE): non-integer x = 30.701974
## Warning in dpois(y, mu, log = TRUE): non-integer x = 167.217679
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.685820
## Warning in dpois(y, mu, log = TRUE): non-integer x = 210.245304
## Warning in dpois(y, mu, log = TRUE): non-integer x = 284.919159
## Warning in dpois(y, mu, log = TRUE): non-integer x = 46.476316
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.680969
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.637827
## Warning in dpois(y, mu, log = TRUE): non-integer x = 30.473794
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.983116
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.873471
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.263018
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.779628
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.277613
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.602524
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.393270
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.950724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 52.948584
## Warning in dpois(y, mu, log = TRUE): non-integer x = 151.370352
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.340366
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.849351
## Warning in dpois(y, mu, log = TRUE): non-integer x = 48.667022
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.718140
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.618940
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.535976
## Warning in dpois(y, mu, log = TRUE): non-integer x = 95.808905
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.011247
## Warning in dpois(y, mu, log = TRUE): non-integer x = 83.060795
## Warning in dpois(y, mu, log = TRUE): non-integer x = 238.121940
## Warning in dpois(y, mu, log = TRUE): non-integer x = 164.033797
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.685546
## Warning in dpois(y, mu, log = TRUE): non-integer x = 81.619451
## Warning in dpois(y, mu, log = TRUE): non-integer x = 255.500307
## Warning in dpois(y, mu, log = TRUE): non-integer x = 347.159348
## Warning in dpois(y, mu, log = TRUE): non-integer x = 90.468592
glm quasipoisson
#q0 glm quasipoisson
q0_glm_quasipoisson_fltr <- glm(q0 ~ log(Effective_reads)+Source*Location,
family = quasipoisson(link = "log"),
data = metadata_with_hill_fltr)
#q1 glm quasipoisson
q1_glm_quasipoisson_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
family = quasipoisson(link = "log"),
data = metadata_with_hill_fltr)
#q2 glm quasipoisson
q2_glm_quasipoisson_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
family = quasipoisson(link = "log"),
data = metadata_with_hill_fltr)glmer
#q0 glmer
q0_glmer_fltr <- glmer(q0 ~ log(Effective_reads)+Source*Location+(1|Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr)
#q1 glmer
q1_glmer_fltr <- glmer(q1 ~ log(Effective_reads)+Source*Location+(1|Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr)
#q2 glmer
q2_glmer_fltr <- glmer(q2 ~ log(Effective_reads)+Source*Location+(1|Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr)glmmTMB
# glmmTMB
# q0
q0_glmmTMB_fltr <- glmmTMB(q0 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)
# q1
q1_glmmTMB_fltr <- glmmTMB(q1 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a poisson model
# q2
q2_glmmTMB_fltr <- glmmTMB(q2 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a poisson model
glmmTMB negative binomial
# glmmTMB
# q0
q0_glmmTMB_negbinml_fltr <- glmmTMB(q0 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
family = nbinom2(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)
# q1
q1_glmmTMB_negbinml_fltr <- glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
family = nbinom2(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a nbinom2 model
# q2
q2_glmmTMB_negbinml_fltr <- glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
family = nbinom2(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a nbinom2 model
calculate_AIC_quasiPoisson <- function(model) {
residuals <- residuals(model, type = "pearson")
deviance <- sum(residuals^2)
n <- length(residuals)
logLikelihood <- -0.5 * (n * log(2 * pi * deviance / n) + deviance)
k <- length(coef(model))
AIC_value <- 2 * k - 2 * logLikelihood
return(AIC_value)
}q012_gmodels_comparison_fltr <- data.frame(
Model = c("LME q0", "GLM Poisson q0", "GLM Quasi-Poisson q0", "GLMER q0", "GLMMTMB q0", "GLMMTMB_bn q0",
"LME q1", "GLM Poisson q1", "GLM Quasi-Poisson q1", "GLMER q1", "GLMMTMB q1","GLMMTMB_bn q1",
"LME q2", "GLM Poisson q2", "GLM Quasi-Poisson q2", "GLMER q2", "GLMMTMB q2","GLMMTMB_bn q2"),
AIC = c(AIC(q0_lme_fltr), AIC(q0_glm_fltr), calculate_AIC_quasiPoisson(q0_glm_quasipoisson_fltr), AIC(q0_glmer_fltr), AIC(q0_glmmTMB_fltr), AIC(q0_glmmTMB_negbinml_fltr),
AIC(q1_lme_fltr), AIC(q1_glm_fltr), calculate_AIC_quasiPoisson(q1_glm_quasipoisson_fltr), AIC(q1_glmer_fltr), AIC(q1_glmmTMB_fltr), AIC(q1_glmmTMB_negbinml_fltr),
AIC(q2_lme_fltr), AIC(q2_glm_fltr), calculate_AIC_quasiPoisson(q2_glm_quasipoisson_fltr), AIC(q2_glmer_fltr), AIC(q2_glmmTMB_fltr), AIC(q2_glmmTMB_negbinml_fltr)),
BIC = c(BIC(q0_lme_fltr), BIC(q0_glm_fltr), BIC(q0_glm_quasipoisson_fltr), BIC(q0_glmer_fltr), BIC(q0_glmmTMB_fltr), BIC(q0_glmmTMB_negbinml_fltr),
BIC(q1_lme_fltr), BIC(q1_glm_fltr), BIC(q1_glm_quasipoisson_fltr), BIC(q1_glmer_fltr), BIC(q1_glmmTMB_fltr), BIC(q1_glmmTMB_negbinml_fltr),
BIC(q2_lme_fltr), BIC(q2_glm_fltr), BIC(q2_glm_quasipoisson_fltr), BIC(q2_glmer_fltr), BIC(q2_glmmTMB_fltr), BIC(q2_glmmTMB_negbinml_fltr))
)
# show
q012_gmodels_comparison_fltrThe best AIC/BIC values are with lme, so, I will check lme model assumptions
Effect
# extract p-values fun
extract_pvalues <- function(perm_test_result) {
perm_test_result$resultats$p.value
}
# extract
lme_pvalues_q0_fltr <- extract_pvalues(q0_lme_perm_fltr)
lme_pvalues_q1_fltr <- extract_pvalues(q1_lme_perm_fltr)
lme_pvalues_q2_fltr <- extract_pvalues(q2_lme_perm_fltr)
# Dataframe p-values
library(dplyr)
lme_pvalues_table_fltr <- data.frame(
Hill = c("q0", "q1", "q2"),
`Intercept` = c(lme_pvalues_q0_fltr[1], lme_pvalues_q1_fltr[1], lme_pvalues_q2_fltr[1]),
`log(Depth)` = c(lme_pvalues_q0_fltr[2], lme_pvalues_q1_fltr[2], lme_pvalues_q2_fltr[2]),
`Source` = c(lme_pvalues_q0_fltr[3], lme_pvalues_q1_fltr[3], lme_pvalues_q2_fltr[3]),
`Location` = c(lme_pvalues_q0_fltr[4], lme_pvalues_q1_fltr[4], lme_pvalues_q2_fltr[4]),
`Source:Location` = c(lme_pvalues_q0_fltr[5], lme_pvalues_q1_fltr[5], lme_pvalues_q2_fltr[5])
)
# table
knitr::kable(t(lme_pvalues_table_fltr), caption = "P-values from Permuted LME")| Hill | q0 | q1 | q2 |
| Intercept | 0.01 | 0.00 | 0.01 |
| log.Depth. | 0.07 | 0.00 | 0.01 |
| Source | 0 | 0 | 0 |
| Location | 0 | 0 | 0 |
| Source.Location | 0 | 0 | 0 |
# save
write.csv(t(lme_pvalues_table_fltr), "results/03.Diversity/P-values_from_Permuted_LME_fltr.csv", row.names = FALSE)Ok, aquí parece que a excepción de log(Effective reads) Source, Location y la interacción tienen un efecto significativo en la diversidad q0, q1 y q2.
Evaluemos los supuestos:
Homocedasticidad
bptest, solo funciona para lm no para lme :(
#Homocedasticidad p-value >0.05 si cumple supuesto de homocedasticidad
library(lmtest)
# q0
Homoce_lme_q0_fltr <- bptest(q0_lme_fltr)
Homoce_lme_q0_fltr
# q1
Homoce_lme_q1_fltr <- bptest(q1_lme_fltr)
Homoce_lme_q1_fltr
# q2
Homoce_lme_q2_fltr <- bptest(q2_lme_fltr)
Homoce_lme_q2_fltrq0_lme_perm_fltr##
## Monte-Carlo test
##
## Call:
## PermTest.lme(obj = q0_lme_fltr, B = 100)
##
## Based on 100 replicates
## Simulated p-value:
## p.value
## (Intercept) 0.01
## log(Effective_reads) 0.07
## Source 0.00
## Location 0.00
## Source:Location 0.00
Check with easystats
q=0
#easystats::install_suggested()
library(easystats)## # Attaching packages: easystats 0.7.1 (red = needs update)
## ✔ bayestestR 0.13.2 ✔ correlation 0.8.4
## ✔ datawizard 0.10.0 ✔ effectsize 0.8.7
## ✔ insight 0.19.10 ✔ modelbased 0.8.7
## ✔ performance 0.11.0 ✔ parameters 0.21.6
## ✔ report 0.5.8 ✖ see 0.8.3
##
## Restart the R-Session and update packages with `easystats::easystats_update()`.
#Remember model # q0_lme_fltr <- lme(q0 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
check_q0_lme_1 <- check_model(q0_lme_fltr)## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q0_lme_1)check_q0_lme_2 <- check_model(update(q0_lme_fltr, .~. -Source:Location))## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q0_lme_2)
Al eliminar la interacción mejoró la colinealidad y la normalidad y
linearidad se mantuvieron adecuadas. Me quedo entonces con este modelo
ajustado
q=1
check_q1_lme_1 <- check_model(q1_lme_fltr)## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q1_lme_1)check_q1_lme_2 <- check_model(update(q1_lme_fltr, .~. -Source:Location))## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q1_lme_2)check_q1_lme_3 <- check_model(update(q1_lme_fltr, .~. -log(Effective_reads)))## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q1_lme_3)q1_lme_fltr_wtotER <- update(q1_lme_fltr, .~. -log(Effective_reads))
check_q1_lme_4 <- check_model(update(q1_lme_fltr_wtotER, .~. -Source:Location))## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q1_lme_4)Pues realmente, no mejoró mucho la linealidad, asi que seguiré con el modelo que después de lme dió mejores valores de AIC/BIC.
q=2
check_q2_lme_1 <- check_model(q2_lme_fltr)## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
check_q2_lme_1
Pff este si estuvo muy cónico, next!
Sólo evaluaré a q1 y q2 xq q0 estuvo chido con lme
q=1
check_q1_glmmTMB_bn_1 <- check_model(q1_glmmTMB_negbinml_fltr)## `check_outliers()` does not yet support models of class `glmmTMB`.
check_q1_glmmTMB_bn_1check_q1_glmmTMB_bn_2 <- check_model(update(q1_glmmTMB_negbinml_fltr, .~. -Source:Location))## Warning in glmmTMB(formula = q1 ~ log(Effective_reads) + Source + Location + :
## non-integer counts in a nbinom2 model
## `check_outliers()` does not yet support models of class `glmmTMB`.
check_q1_glmmTMB_bn_2Pues ninguno esta chido, hay sobredispersión, la homogeneidad de la varianza es creciente, ósea hay heterocedasticidad. La colinealidad mejora cuando se quita la interacción pero aun asi no parece ser el modelo adecuado.
Veamos con quasipoisson que si bien no tuvo el mejor AIC/BIC es el que recomendaron en MicrobiomaLab que deberia seguir despues de LME
check_q1_glm_qp_1 <- check_model(q1_glm_quasipoisson_fltr)
check_q1_glm_qp_1check_q1_glm_qp_2 <- check_model(update(q1_glm_quasipoisson_fltr, .~. -Source:Location))
check_q1_glm_qp_2
Ya ni entiendo que show
Provaré con el que sigue de mejor AIC/BIC
check_q1_glmmTMB_1 <- check_model(q1_glmmTMB_fltr)## `check_outliers()` does not yet support models of class `glmmTMB`.
print(check_q1_glmmTMB_1)check_q1_glmmTMB_2 <- check_model(update(q1_glmmTMB_fltr, .~. -Source:Location))## Warning in glmmTMB(formula = q1 ~ log(Effective_reads) + Source + Location + :
## non-integer counts in a poisson model
## `check_outliers()` does not yet support models of class `glmmTMB`.
print(check_q1_glmmTMB_2)Definitivamente estoy perdida con todo esto!!!!
#this was the best model adjust
q0_lme_fltr_whotinteract <- update(q0_lme_fltr, .~. -Source:Location)q0_lme_means_1 <- emmeans(q0_lme_fltr_whotinteract, pairwise ~ Source + Location)
q0_lme_means_1_cld <- multcomp::cld(object = q0_lme_means_1$emmeans,
Letters = letters)
q0_lme_means_1_cld <- as.data.frame(q0_lme_means_1_cld)
q0_lme_means_1_cld# Asumiendo que las columnas 'Source' y 'Location' ya están correctamente formateadas y que 'q0_lme_means_1_cld' contiene una columna '.group'
metadata_with_hill_fltr_q0_letters <- merge(metadata_with_hill_fltr, q0_lme_means_1_cld, by = c("Source", "Location"), all.x = TRUE)library(ggplot2)
library(ggpubr)
# factor to reorder plot
metadata_with_hill_fltr_q0_letters$Location <- factor(metadata_with_hill_fltr_q0_letters$Location, levels = c("Monte obscuro Emiliano Zapata", "Naolinco", "El Castillo/Zentla"))
metadata_with_hill_fltr_q0_letters$Compartment <- factor(metadata_with_hill_fltr_q0_letters$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# group significance
metadata_with_hill_fltr_q0_letters2 <- metadata_with_hill_fltr_q0_letters %>%
group_by(Compartment, Location) %>%
summarise(
q0 = mean(q0, na.rm = TRUE),
.group = first(.group))## `summarise()` has grouped output by 'Compartment'. You can override using the
## `.groups` argument.
#plot
q0_barplot_sig <- metadata_with_hill_fltr_q0_letters %>%
ggbarplot(
x = "Compartment",
y = "q0",
add = "mean_se",
facet.by = "Location",
fill = "Compartment"
) +
scale_fill_jco() +
geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
geom_text(aes(label = .group), position = position_dodge(0.7), vjust = -0.5) +
facet_wrap(~ Location, scales = "free_y") +
theme(
strip.background = element_blank(), #element_rect(fill = "")
strip.text.x = element_text(face= "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face= "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
#panel.border = element_blank(),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11),
legend.position = "bottom"
) +
labs(
fill = "Compartment",
y = "",
x = "", #,
title = "Biodiversity across Locations and Compartments"
) + #+
#theme(plot.title = element_text(hjust = 0.5))
theme(legend.text = element_text(size = 12))
print(q0_barplot_sig)library(ggplot2)
library(ggpubr)
# Refactor
metadata_with_hill_fltr_q0_letters$Location <- factor(
metadata_with_hill_fltr_q0_letters$Location,
levels = c("Monte obscuro Emiliano Zapata", "Naolinco", "El Castillo/Zentla")
)
metadata_with_hill_fltr_q0_letters$Compartment <- factor(
metadata_with_hill_fltr_q0_letters$Compartment,
levels = c("Soil associated", "Rhizhomorph", "Stipe")
)
# group sig
metadata_with_hill_fltr_q0_letters2 <- metadata_with_hill_fltr_q0_letters %>%
group_by(Compartment, Location) %>%
summarise(
q0 = mean(q0, na.rm = TRUE),
.group = first(.group))## `summarise()` has grouped output by 'Compartment'. You can override using the
## `.groups` argument.
# barplot
q0_barplot_sig <- ggbarplot(
data = metadata_with_hill_fltr_q0_letters2,
x = "Compartment",
y = "q0",
add = "mean_se", # Añadir barras de error que representan el error estándar
facet.by = "Location",
fill = "Compartment",
position = position_dodge(0.7),
color = "black",
palette = "jco"
) + geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
theme_bw()+
geom_text(aes(label = .group), position = position_dodge(0.7), vjust = -0.5) +
theme(
strip.background = element_blank(),
strip.text.x = element_text(face = "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face = "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11),
legend.position = "bottom"
) +
labs(
fill = "Compartment",
y = "",
x = "",
title = "Biodiversity across Locations and Compartments"
)## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
# Imprimir el gráfico
print(q0_barplot_sig)library(ggplot2)
library(ggpubr)
# Reordenar los niveles para el plot
metadata_with_hill_fltr_q0_letters$Location <- factor(
metadata_with_hill_fltr_q0_letters$Location,
levels = c("Monte obscuro Emiliano Zapata", "Naolinco", "El Castillo/Zentla")
)
metadata_with_hill_fltr_q0_letters$Compartment <- factor(
metadata_with_hill_fltr_q0_letters$Compartment,
levels = c("Soil associated", "Rhizhomorph", "Stipe")
)
# Crear el gráfico usando ggbarplot
q0_barplot_sig <- ggbarplot(
data = metadata_with_hill_fltr_q0_letters,
x = "Compartment",
y = "q0",
add = "mean_se", # Añadir barras de error que representan el error estándar
facet.by = "Location",
fill = "Compartment",
color = "black",
position = position_dodge(0.7),
palette = "jco"
) +
geom_jitter(
aes(color = Compartment),
position = position_jitter(width = 0.1),
size = 0.8
) +
stat_summary(
fun.data = "mean_se",
geom = "errorbar",
width = 0.2,
position = position_dodge(0.7),
aes(group = Compartment)
) +
stat_summary(
fun = mean,
geom = "point",
shape = 18,
size = 3,
position = position_dodge(0.7),
color = "black"
) +
geom_text(
aes(label = .group),
position = position_dodge(0.7),
vjust = -0.5
) +
theme_minimal() +
theme(
strip.background = element_blank(),
strip.text.x = element_text(face = "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face = "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11),
legend.position = "bottom"
) +
labs(
fill = "Compartment",
y = "",
x = "",
title = "Biodiversity across Locations and Compartments"
)
# Imprimir el gráfico
print(q0_barplot_sig)library(ggplot2)
# Configurar los datos
metadata_with_hill_fltr_q0_letters$Location <- factor(metadata_with_hill_fltr_q0_letters$Location,
levels = c("Monte obscuro Emiliano Zapata", "Naolinco", "El Castillo/Zentla"))
metadata_with_hill_fltr_q0_letters$Compartment <- factor(metadata_with_hill_fltr_q0_letters$Compartment,
levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Crear el gráfico
plot <- ggplot(metadata_with_hill_fltr_q0_letters, aes(x = Compartment, y = q0, fill = Compartment)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
stat_summary(
fun.data = "mean_se", geom = "errorbar", width = 0.2,
position = position_dodge(0.9),
aes(group = Compartment)
) +
stat_summary(
fun = mean, geom = "text", aes(label = .group),
position = position_dodge(width = 0.7), vjust = -1.9, # Ajuste para mejorar la posición vertical
show.legend = FALSE,
size = 4 # Tamaño de la fuente
) +
facet_wrap(~ Location, scales = "free_y") +
scale_fill_jco() +
theme_bw() +
theme(
legend.position = "bottom",
strip.background = element_blank(),
strip.text.x = element_text(face = "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face = "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11)
) +
labs(
fill = "Compartment",
y = "",
x = "",
title = "Biodiversity across Locations and Compartments"
)
# Imprimir el gráfico
print(plot)ok, mi plot es un desastre, pero eso no es lo peor, lo malo es que estas significancias estan raras no?
library(hilldiv)
library(tidyverse)
library(kableExtra)
# Get hill numbers
q0_c <- hill_div(otu_table_full_core, qvalue = 0)
q1_c <- hill_div(otu_table_full_core, qvalue = 1)
q2_c <- hill_div(otu_table_full_core, qvalue = 2)
# Merge metadata with Hill numbers
q012_c <- cbind(q0_c, q1_c, q2_c) %>% as.data.frame() %>% rownames_to_column(var = "sample")
metadata_with_hill_core <- q012_c %>%
inner_join(meta_full_core, by = c("sample"="sample"))
#save table
write.table(metadata_with_hill_core, "results/03.Diversity/Metadata_with_hill_core.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# show
metadata_with_hill_core %>% head() %>% kable()| sample | q0_c | q1_c | q2_c | Source | Date | Year | Location | Week | Raw.reads | Colect_number | Specimen | quant_reading | Effective_reads | ASVs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| DR1014-E | 33 | 5.937934 | 3.364797 | Stipe | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 159896 | DRamos 1014 | Ind_1 | 948 | 68295 | 135 |
| DR1014-SAH | 144 | 89.472107 | 62.663860 | Soil associated | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 223804 | DRamos 1014 | Ind_1 | 3071 | 40974 | 2118 |
| DR1018-E | 122 | 6.037923 | 2.004335 | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 68578 | DRamos 1018 | Ind_3 | 97 | 23890 | 768 |
| DR1018-SAH | 153 | 91.533182 | 64.969780 | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 211120 | DRamos 1018 | Ind_3 | 4017 | 38161 | 1824 |
| DR1019-E | 122 | 61.454388 | 33.429425 | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 144270 | DRamos 1019 | Ind_4 | 215 | 33853 | 1012 |
| DR1019-SAH | 153 | 96.430497 | 66.569922 | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 185380 | DRamos 1019 | Ind_4 | 3754 | 34576 | 2202 |
Get means
# Reorder q values
meta_qs_c <- metadata_with_hill_core %>%
pivot_longer(cols = q0_c:q2_c, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0_c", "q1_c", "q2_c")) %>%
mutate(
qs = case_when(
q == "q0_c" ~ "q=0",
q == "q1_c" ~ "q=1",
q == "q2_c" ~ "q=2"
))
#Get means of Hill numbers
means_c <- meta_qs_c %>% group_by(Source, Location, qs) %>%
summarise(means = mean(value, na.rm = TRUE),
sd = sd(value, na.rm = TRUE),
.groups = 'drop')
#save table
write.table(means_c, "results/03.Diversity/Hill_means_sd_core.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
print(means_c)## # A tibble: 27 × 5
## Source Location qs means sd
## <fct> <chr> <chr> <dbl> <dbl>
## 1 Soil associated El Castillo/Zentla q=0 234. 22.1
## 2 Soil associated El Castillo/Zentla q=1 96.2 8.67
## 3 Soil associated El Castillo/Zentla q=2 44.4 9.64
## 4 Soil associated Monte obscuro Emiliano Zapata q=0 156. 9.41
## 5 Soil associated Monte obscuro Emiliano Zapata q=1 96.2 6.49
## 6 Soil associated Monte obscuro Emiliano Zapata q=2 66.7 8.14
## 7 Soil associated Naolinco q=0 327. 19.2
## 8 Soil associated Naolinco q=1 101. 22.0
## 9 Soil associated Naolinco q=2 38.0 16.4
## 10 Rhizhomorph El Castillo/Zentla q=0 223. 21.0
## # ℹ 17 more rows
library(ggpubr)
# factor to reorder plot
metadata_with_hill_core$Location <- factor(metadata_with_hill_core$Location)
metadata_with_hill_core$Compartment <- factor(metadata_with_hill_core$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
#plot
hill_barplot_explore_core <- metadata_with_hill_core %>%
pivot_longer(cols = q0_c:q2_c, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0_c", "q1_c", "q2_c")) %>%
mutate(
qs = case_when(
q == "q0_c" ~ "q=0",
q == "q1_c" ~ "q=1",
q == "q2_c" ~ "q=2"
)) %>%
ggbarplot(
x = "Compartment",
y = "value",
add = "mean_se",
facet.by = c("qs", "Location"),
fill = "Compartment"
) +
scale_fill_jco() +
geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
facet_grid(rows = vars(qs), cols = vars(Location), scales = "free_y") +
theme(
strip.background = element_blank(), #element_rect(fill = "")
strip.text.x = element_text(face= "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face= "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
#panel.border = element_blank(),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11),
legend.position = "bottom"
) +
labs(
fill = "Compartment",
y = "",
x = "", #,
title = "Biodiversity across Locations and Compartments of Core Microbiota"
) + #+
#theme(plot.title = element_text(hjust = 0.5))
theme(legend.text = element_text(size = 12)) #+
#stat_summary(fun = mean, geom = "line", aes(group = Compartment), color = "red")
hill_barplot_explore_core#ggsave("results/plots/02.diversity/02.Hill_diversity_across_Locations_and_Compartments_fltr.png", hill_barplot_explore_fltr, width = 11, height = 11)Prepare data
Explore
aldex_clr_data <- t(getMonteCarloSample(aldex_clr,1))
pca <- prcomp(aldex_clr_data)
meta=metadata_with_hill_fltr %>% dplyr::rename(SampleID="sample-id")library(ggplot2)
library(tidyverse)
library(ggsci)
pca_plot<- ggplot() +
geom_segment(data=data.frame(pca$rotation) %>% #arrows
rownames_to_column(var = "Feature.ID")%>%
mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
top_n(8, a) %>% #keep 10 furthest away points
mutate(PC1=PC1*50, PC2=PC2*50),
aes(x=0, xend=PC1, y=0, yend=PC2),
arrow = arrow(length = unit(0.3,"cm")))+
geom_point(data=data.frame(pca$x) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID"),
aes(x=PC1, y=PC2, fill=Source),shape=21, size=2) +
geom_vline(xintercept = 0, linetype = 2) + #lines-cross
geom_hline(yintercept = 0, linetype = 2) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical") +
geom_polygon(data=data.frame(pca$x) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID")%>%
drop_na() %>%
group_by(Location) %>%
slice(chull(PC1, PC2)),
aes(x=PC1, y=PC2, fill=Source, color=Source),
alpha = 0.3,
show.legend = FALSE)+
ylab("PC2")+xlab("PC1")+ theme_bw()
# ggrepel::geom_label_repel(data=data.frame(pca$rotation) %>% #arrows
# rownames_to_column(var = "Feature.ID")%>%
#mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
#top_n(8, a) %>% #keep 10 furthest away points
#mutate(PC1=PC1*10, PC2=PC2*10)%>% left_join(
# taxonomys)%>% dplyr::select(
#Taxon, PC1, PC2, Feature.ID)%>%
#mutate_at(
# c("Taxon"), ~str_replace(.,";s__unidentified", "")) %>% mutate(
# tax= str_extract(Taxon, "[^__]+$")) %>%
#mutate_at(c("tax"), funs(tax = case_when(
# tax=="Fungi" ~ "Unidentified",
#tax=="sajor_caju" ~ "Lentinus",
#TRUE~as.character(tax)))),
#aes(x=PC1, y=PC2, label= tax),
#segment.colour = NA, col = 'black', fill= "#EEEEEE",
#fontface="italic", box.padding = 0.2, size=4)
pca_plotlibrary(ggplot2)
library(dplyr)
library(tidyr)
# Reorder
meta$Source <- factor(meta$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# plot
pca_plot <- ggplot(data = data.frame(pca$x) %>%
rownames_to_column(var = "SampleID") %>%
left_join(meta, by = "SampleID")) +
geom_segment(data = data.frame(pca$rotation) %>%
rownames_to_column(var = "Feature.ID") %>%
mutate(a = sqrt(PC1^2 + PC2^2)) %>%
top_n(8, a) %>%
mutate(PC1 = PC1*50, PC2 = PC2*50),
aes(x = 0, xend = PC1, y = 0, yend = PC2),
arrow = arrow(length = unit(0.01,"cm"))) +
geom_point(aes(x = PC1, y = PC2, color = Source, shape = Location), size = 3) +
geom_vline(xintercept = 0, linetype = 2) +
geom_hline(yintercept = 0, linetype = 2) +
scale_color_jco() +
scale_shape_manual(values = c(15, 16, 17)) + # Ajusta los valores al número de categorías de 'Location'
scale_fill_jco() +
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical") +
labs(y = "PC2", x = "PC1", color = "Source", shape = "Location") +
theme_bw()
# ggrepel::geom_label_repel(data=data.frame(pca$rotation) %>% #arrows
# rownames_to_column(var = "Feature.ID")%>%
# mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
# top_n(8, a) %>% #keep 10 furthest away points
# mutate(PC1=PC1*10, PC2=PC2*10)%>% left_join(
# taxonomys)%>% dplyr::select(
# Taxon, PC1, PC2, Feature.ID)%>%
# mutate_at(
# c("Taxon"), ~str_replace(.,";s__unidentified", "")) %>% mutate(
# tax= str_extract(Taxon, "[^__]+$")) %>%
# mutate_at(c("tax"), funs(tax = case_when(
# tax=="Fungi" ~ "Unidentified",
# tax=="sajor_caju" ~ "Lentinus",
# TRUE~as.character(tax)))),
# aes(x=PC1, y=PC2, label= tax),
# segment.colour = NA, col = 'black', fill= "#EEEEEE",
# fontface="italic", box.padding = 0.2, size=4)
pca_plotnmds_source_clr = vegan::metaMDS(aldex_clr_data, trymax = 20, k = 2)
var_stress_nmds_clr <- round(nmds_source_clr$stress, 5)
var_stress_nmds_clr
scores_source_clr= nmds_source_clr %>% vegan::scores()
meta$Source <- factor(meta$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_clr<- ggplot() +
geom_point(data=data.frame(scores_source_clr) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_clr <- nmds_plot_clr +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_clr
Me marca un error:
Error in eigen(-x/2, symmetric = TRUE) :
infinite or missing values in 'x'
# Bray NMDS bray
library(vegan)## Loading required package: permute
##
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
##
## check
## Loading required package: lattice
## This is vegan 2.6-4
##
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
##
## diversity
bray=vegdist(t(otu_table_fltr), method = "bray")
nmds_source_bray = vegan::metaMDS(bray, trymax = 20, k = 2) ## Run 0 stress 0.173438
## Run 1 stress 0.1936625
## Run 2 stress 0.1927455
## Run 3 stress 0.1981773
## Run 4 stress 0.1897986
## Run 5 stress 0.1745998
## Run 6 stress 0.212297
## Run 7 stress 0.1835367
## Run 8 stress 0.1849069
## Run 9 stress 0.1813691
## Run 10 stress 0.1853846
## Run 11 stress 0.1825196
## Run 12 stress 0.189546
## Run 13 stress 0.1747444
## Run 14 stress 0.1937153
## Run 15 stress 0.174599
## Run 16 stress 0.1822082
## Run 17 stress 0.1730464
## ... New best solution
## ... Procrustes: rmse 0.01352697 max resid 0.0855995
## Run 18 stress 0.1791131
## Run 19 stress 0.1850546
## Run 20 stress 0.1852807
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 18: stress ratio > sratmax
## 2: scale factor of the gradient < sfgrmin
var_stress_nmds_bray <- round(nmds_source_bray$stress, 5)
var_stress_nmds_bray## [1] 0.17305
scores_source_bray= nmds_source_bray %>% vegan::scores()
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_bray<- ggplot() +
geom_point(data=data.frame(scores_source_bray) %>% #individuals
rownames_to_column(var = "sample")%>%
left_join(metadata_fltr, by = "sample"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_bray <- nmds_plot_bray +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_bray),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_bray# NMDS aichitson
library(vegan)
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount
aitch=vegdist(t(otu_table_fltr_pseudo), method = "aitchison")
nmds_source_aitch = vegan::metaMDS(aitch, trymax = 20, k = 2) ## Run 0 stress 0.1675395
## Run 1 stress 0.1866725
## Run 2 stress 0.1707758
## Run 3 stress 0.1960236
## Run 4 stress 0.1829956
## Run 5 stress 0.2168936
## Run 6 stress 0.1692788
## Run 7 stress 0.1826432
## Run 8 stress 0.2147554
## Run 9 stress 0.2076152
## Run 10 stress 0.1678749
## ... Procrustes: rmse 0.01046227 max resid 0.05613392
## Run 11 stress 0.1750972
## Run 12 stress 0.1860202
## Run 13 stress 0.1696013
## Run 14 stress 0.2695321
## Run 15 stress 0.271712
## Run 16 stress 0.1825821
## Run 17 stress 0.1941716
## Run 18 stress 0.1835351
## Run 19 stress 0.1934639
## Run 20 stress 0.1899538
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 18: stress ratio > sratmax
## 2: scale factor of the gradient < sfgrmin
var_stress_nmds_aitch <- round(nmds_source_aitch$stress, 5)
var_stress_nmds_aitch## [1] 0.16754
scores_source_aitch= nmds_source_aitch %>% vegan::scores()
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_aitch<- ggplot() +
geom_point(data=data.frame(scores_source_aitch) %>% #individuals
rownames_to_column(var = "sample")%>%
left_join(metadata_fltr, by = "sample"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_aitch <- nmds_plot_aitch +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_aitch),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_aitchlibrary(vegan)
bray_dist =vegdist(t(otu_table_fltr), method = "bray")
pcoas=wcmdscale(bray_dist, eig = T)
pcoa_plot<- ggplot() +
geom_point(data=data.frame(pcoas$points) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID"),
aes(x=Dim1, y=Dim2,color = Source, shape = Location ), size=4) +
#geom_vline(xintercept = 0, linetype = 2) + #lines-cross
#geom_hline(yintercept = 0, linetype = 2) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw()+
#geom_polygon(data=data.frame(pca$x) %>% #individuals
#rownames_to_column(var = "SampleID")%>%
#left_join(metadata, by = "SampleID")%>%
#drop_na() %>%
#group_by(Type_of_soil) %>%
#slice(chull(PC1, PC2)),
#aes(x=PC1, y=PC2, fill=Type_of_soil, color=Type_of_soil),
#alpha = 0.3,
#show.legend = FALSE)+
ylab("PCoA2")+xlab("PCoA1")
pcoa_plotaitchison independientes por loction
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(cowplot) # Cargar la librería para combinar gráficos
# Agregar pseudocount y calcular la distancia de Aitchison para cada 'Location'
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount
# Crear una lista para guardar los resultados de NMDS y los valores de estrés por 'Location'
nmds_list <- list()
stress_values <- c()
# Realizar NMDS para cada 'Location' y guardar los resultados en la lista
unique_locations <- unique(meta$Location)
for (loc in unique_locations) {
# Filtrar la tabla OTU por 'Location'
otu_subset <- otu_table_fltr_pseudo[, meta$Location == loc, drop=FALSE]
aitch <- vegdist(t(otu_subset), method = "aitchison")
# Ejecutar NMDS
nmds_result <- metaMDS(aitch, trymax = 20, k = 3)
# Guardar los resultados y valores de estrés en la lista
nmds_list[[loc]] <- nmds_result
stress_values[loc] <- nmds_result$stress
}## Run 0 stress 0.09286269
## Run 1 stress 0.0901747
## ... New best solution
## ... Procrustes: rmse 0.04371649 max resid 0.1230049
## Run 2 stress 0.09017497
## ... Procrustes: rmse 0.0001111472 max resid 0.0002876607
## ... Similar to previous best
## Run 3 stress 0.1015074
## Run 4 stress 0.1015071
## Run 5 stress 0.09010313
## ... New best solution
## ... Procrustes: rmse 0.03031164 max resid 0.09068644
## Run 6 stress 0.1019489
## Run 7 stress 0.0902035
## ... Procrustes: rmse 0.02054797 max resid 0.06154937
## Run 8 stress 0.09010323
## ... Procrustes: rmse 0.00007060126 max resid 0.0001571173
## ... Similar to previous best
## Run 9 stress 0.09286223
## Run 10 stress 0.1015077
## Run 11 stress 0.0901741
## ... Procrustes: rmse 0.03012659 max resid 0.08968475
## Run 12 stress 0.1015064
## Run 13 stress 0.09010451
## ... Procrustes: rmse 0.002570027 max resid 0.007123546
## ... Similar to previous best
## Run 14 stress 0.101949
## Run 15 stress 0.09286283
## Run 16 stress 0.09286261
## Run 17 stress 0.1015066
## Run 18 stress 0.1015074
## Run 19 stress 0.1019491
## Run 20 stress 0.1019491
## *** Best solution repeated 2 times
## Run 0 stress 0.09353877
## Run 1 stress 0.08863153
## ... New best solution
## ... Procrustes: rmse 0.06685004 max resid 0.1821596
## Run 2 stress 0.07697124
## ... New best solution
## ... Procrustes: rmse 0.04181677 max resid 0.140785
## Run 3 stress 0.07886751
## Run 4 stress 0.07886684
## Run 5 stress 0.0855555
## Run 6 stress 0.07606434
## ... New best solution
## ... Procrustes: rmse 0.0459476 max resid 0.1755527
## Run 7 stress 0.09101242
## Run 8 stress 0.07607725
## ... Procrustes: rmse 0.0110939 max resid 0.03852334
## Run 9 stress 0.09162134
## Run 10 stress 0.09175955
## Run 11 stress 0.07697059
## Run 12 stress 0.07886749
## Run 13 stress 0.07697089
## Run 14 stress 0.07886739
## Run 15 stress 0.07697175
## Run 16 stress 0.07697061
## Run 17 stress 0.09201912
## Run 18 stress 0.07606481
## ... Procrustes: rmse 0.000255875 max resid 0.0007155661
## ... Similar to previous best
## Run 19 stress 0.07593264
## ... New best solution
## ... Procrustes: rmse 0.02127687 max resid 0.09485999
## Run 20 stress 0.07607456
## ... Procrustes: rmse 0.01649236 max resid 0.06185298
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 3: no. of iterations >= maxit
## 17: stress ratio > sratmax
## Run 0 stress 0.09080181
## Run 1 stress 0.09080181
## ... Procrustes: rmse 0.00001590652 max resid 0.00005827989
## ... Similar to previous best
## Run 2 stress 0.09080181
## ... Procrustes: rmse 0.000007913822 max resid 0.00002322651
## ... Similar to previous best
## Run 3 stress 0.09080181
## ... Procrustes: rmse 0.00002436483 max resid 0.00006872031
## ... Similar to previous best
## Run 4 stress 0.09080186
## ... Procrustes: rmse 0.00004978257 max resid 0.000158404
## ... Similar to previous best
## Run 5 stress 0.09080183
## ... Procrustes: rmse 0.00004486318 max resid 0.0001214029
## ... Similar to previous best
## Run 6 stress 0.09080181
## ... Procrustes: rmse 0.0000107887 max resid 0.00003307432
## ... Similar to previous best
## Run 7 stress 0.09080182
## ... Procrustes: rmse 0.00003975655 max resid 0.0001130091
## ... Similar to previous best
## Run 8 stress 0.09080181
## ... Procrustes: rmse 0.00000321525 max resid 0.00001451934
## ... Similar to previous best
## Run 9 stress 0.09080181
## ... Procrustes: rmse 0.00002476848 max resid 0.00007265074
## ... Similar to previous best
## Run 10 stress 0.09080181
## ... Procrustes: rmse 0.000004157935 max resid 0.0000121865
## ... Similar to previous best
## Run 11 stress 0.09080181
## ... Procrustes: rmse 0.000006341339 max resid 0.00002330866
## ... Similar to previous best
## Run 12 stress 0.09080181
## ... Procrustes: rmse 0.00001736045 max resid 0.00005831044
## ... Similar to previous best
## Run 13 stress 0.09080185
## ... Procrustes: rmse 0.00006051381 max resid 0.0001648197
## ... Similar to previous best
## Run 14 stress 0.09080181
## ... Procrustes: rmse 0.00002405465 max resid 0.00007539443
## ... Similar to previous best
## Run 15 stress 0.09080181
## ... Procrustes: rmse 0.00001542878 max resid 0.00005081757
## ... Similar to previous best
## Run 16 stress 0.09080181
## ... Procrustes: rmse 0.000005907441 max resid 0.00002038276
## ... Similar to previous best
## Run 17 stress 0.09080181
## ... Procrustes: rmse 0.000007981256 max resid 0.00002994681
## ... Similar to previous best
## Run 18 stress 0.09080181
## ... Procrustes: rmse 0.000004474871 max resid 0.00001139932
## ... Similar to previous best
## Run 19 stress 0.09080181
## ... Procrustes: rmse 0.00001757614 max resid 0.00005194313
## ... Similar to previous best
## Run 20 stress 0.1092954
## *** Best solution repeated 19 times
# Crear una función para generar el gráfico NMDS con stress
plot_nmds <- function(nmds_result, location_name) {
scores_df <- as.data.frame(scores(nmds_result)) %>%
rownames_to_column(var = "SampleID") %>%
left_join(meta, by = "SampleID") %>%
mutate(Location = location_name)
p <- ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Source)) +
geom_point(size = 4) +
labs(title = paste("Location:", location_name, "\nStress:", round(stress_values[location_name], 5)),
x = "NMDS1", y = "NMDS2", color = "Source") +
theme_bw() +
scale_color_jco() +
theme(legend.position = "right", legend.box = "vertical")
return(p)
}
# Aplicar la función a cada resultado NMDS y guardar los plots
nmds_plots <- lapply(names(nmds_list), function(loc) {
plot_nmds(nmds_list[[loc]], loc)
})
# Usar cowplot para combinar los gráficos
nmds_combined_plot <- plot_grid(plotlist = nmds_plots, ncol = 3, align = 'v')
# Mostrar el gráfico combinado
print(nmds_combined_plot)# Guardar la gráfica combinada si es necesario
#ggsave("combined_NMDS_plot.png", nmds_combined_plot, width = 12, height = ​``【oaicite:0】``​meta_just = data.frame(aldex_clr_data, check.names = F) %>%
rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr) ## Joining with `by = join_by(`sample-id`)`
meta_just = data.frame(t(otu_table_fltr_pseudo), check.names = F) %>%
rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr)## Joining with `by = join_by(`sample-id`)`
permanovas = adonis2(t(otu_table_fltr_pseudo) ~ Source*Location,
data=meta_just, method = "aitchison",
permutations = 999, strata = meta_just$Specimen)
permanovasPermanovas aitchison independientes
library(vegan)
library(dplyr)
library(tidyr)
# pseudocount
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount
# Combine
meta_just <- metadata_with_hill_fltr %>%
dplyr::select(`sample-id`, Location, Source, Specimen) %>%
inner_join(as.data.frame(t(otu_table_fltr_pseudo)) %>% rownames_to_column(var = "sample-id"), by = "sample-id")
#
permanova_results <- list()
#
for (loc in unique(metadata_with_hill_fltr$Location)) {
meta_just_loc <- meta_just %>%
filter(Location == loc)
otu_table_loc <- otu_table_fltr_pseudo[, meta_just_loc$`sample-id`, drop = FALSE]
aitch_loc <- vegdist(t(otu_table_loc), method = "aitchison")
permanova_results[[loc]] <- adonis2(aitch_loc ~ Source,
data = meta_just_loc,
permutations = 999,
strata = meta_just_loc$Specimen)
}permanova_results## $`Monte obscuro Emiliano Zapata`
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 1151
##
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
## Df SumOfSqs R2 F Pr(>F)
## Source 2 20333 0.25427 2.2163 0.001 ***
## Residual 13 59635 0.74573
## Total 15 79968 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`El Castillo/Zentla`
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
## Df SumOfSqs R2 F Pr(>F)
## Source 2 15850 0.13282 1.7614 0.001 ***
## Residual 23 103480 0.86718
## Total 25 119330 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Naolinco
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
## Df SumOfSqs R2 F Pr(>F)
## Source 2 20640 0.13271 2.0658 0.001 ***
## Residual 27 134882 0.86729
## Total 29 155522 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Prepare data
Explore
microbiome::summarize_phyloseq(full_ps_core_counts)## Compositional = NO2
## 1] Min. number of reads = 32892] Max. number of reads = 1320233] Total number of reads = 22961414] Average number of reads = 31890.84722222225] Median number of reads = 26767.57] Sparsity = 0.3817351598173526] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 3289"
##
## [[2]]
## [1] "2] Max. number of reads = 132023"
##
## [[3]]
## [1] "3] Total number of reads = 2296141"
##
## [[4]]
## [1] "4] Average number of reads = 31890.8472222222"
##
## [[5]]
## [1] "5] Median number of reads = 26767.5"
##
## [[6]]
## [1] "7] Sparsity = 0.381735159817352"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
#Get otu and metadata table of core phyloseq object
# extract otu table
otu_table_full_core <- otu_table(full_ps_core_counts@otu_table)
meta_full_core <- sample_data(full_ps_core_counts)
# Metadata
#meta_full_core <- data.frame(phyloseq::sample_data(pseq),check.names = FALSE)# NMDS aichitson
library(vegan)
pseudocount <- 1
otu_table_full_core_pseudo <- otu_table_full_core + pseudocount
aitch_fc=vegdist(t(otu_table_full_core_pseudo), method = "aitchison")
nmds_source_aitch_fc = vegan::metaMDS(aitch_fc, trymax = 20, k = 2) ## Run 0 stress 0.1321064
## Run 1 stress 0.1843087
## Run 2 stress 0.1321064
## ... New best solution
## ... Procrustes: rmse 0.00001572868 max resid 0.0001123352
## ... Similar to previous best
## Run 3 stress 0.1546033
## Run 4 stress 0.1427431
## Run 5 stress 0.1426311
## Run 6 stress 0.1426311
## Run 7 stress 0.1688451
## Run 8 stress 0.1524233
## Run 9 stress 0.1700279
## Run 10 stress 0.1506315
## Run 11 stress 0.1519784
## Run 12 stress 0.1321064
## ... New best solution
## ... Procrustes: rmse 0.00000624541 max resid 0.00002068154
## ... Similar to previous best
## Run 13 stress 0.1470723
## Run 14 stress 0.1794383
## Run 15 stress 0.1321064
## ... Procrustes: rmse 0.000008990276 max resid 0.00005963758
## ... Similar to previous best
## Run 16 stress 0.1832279
## Run 17 stress 0.151375
## Run 18 stress 0.1426311
## Run 19 stress 0.1471084
## Run 20 stress 0.1321064
## ... Procrustes: rmse 0.00001190633 max resid 0.00007598868
## ... Similar to previous best
## *** Best solution repeated 3 times
var_stress_nmds_aitch_fc <- round(nmds_source_aitch_fc$stress, 5)
var_stress_nmds_aitch_fc## [1] 0.13211
scores_source_aitch_fc= nmds_source_aitch_fc %>% vegan::scores()
meta_full_core$Source <- factor(meta_full_core$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_aitch_fc<- ggplot() +
geom_point(data=data.frame(scores_source_aitch_fc) %>% #individuals
rownames_to_column(var = "sample")%>%
left_join(meta_full_core, by = "sample"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_aitch_fc <- nmds_plot_aitch_fc +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_aitch_fc),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_aitch_fcmeta_core_pseudo = data.frame(t(otu_table_full_core_pseudo), check.names = F) %>%
rownames_to_column(var = "sample") %>% inner_join(meta_full_core) ## Joining with `by = join_by(sample)`
perm_core = adonis2(t(otu_table_full_core_pseudo) ~ Source*Location,
data=meta_core_pseudo, method = "aitchison",
permutations = 999, strata = meta_core_pseudo$Specimen)
perm_coremeta_just = data.frame(t(otu_table_fltr_pseudo), check.names = F) %>%
rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr)## Joining with `by = join_by(`sample-id`)`
permanovas = adonis2(t(otu_table_fltr_pseudo) ~ Source*Location,
data=meta_just, method = "aitchison",
permutations = 999, strata = meta_just$Specimen)
permanovas##res prim tutorial https://www.yanh.org/2021/01/01/microbiome-r/
library(ANCOMBC)## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
####################################################################
#Run ANCOM-BC at the Family level and only including the prevalent genera
####################################################################
set.seed(123)
output_genus_full = ancombc2(data = tse,
assay_name = "counts",
tax_level = "Genus",
fix_formula = "Source+Location",
rand_formula = "(1|Specimen)",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0.10,
lib_cut = 1000,
s0_perc = 0.05,
group = "Source",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = TRUE,
trend = TRUE,
iter_control = list(tol = 1e-2, max_iter = 20,
verbose = TRUE),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
nrow = 2,
byrow = TRUE),
matrix(c(-1, 0, 1, -1),
nrow = 2,
byrow = TRUE),
matrix(c(1, 0, 1, -1),
nrow = 2,
byrow = TRUE)),
node = list(2, 2, 1),
solver = "ECOS",
B = 10))## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Obtaining initial estimates ...
## REML iteration = 1, epsilon = 2.3
## REML iteration = 2, epsilon = 0.61
## REML iteration = 3, epsilon = 0.36
## REML iteration = 4, epsilon = 0.21
## REML iteration = 5, epsilon = 0.13
## REML iteration = 6, epsilon = 0.079
## REML iteration = 7, epsilon = 0.049
## REML iteration = 8, epsilon = 0.031
## REML iteration = 9, epsilon = 0.019
## REML iteration = 10, epsilon = 0.012
## REML iteration = 11, epsilon = 0.0078
## Estimating sample-specific biases ...
## ANCOM-BC2 primary results ...
## The sensitivity analysis for the pseudo-count addition ...
## ANCOM-BC2 global test ...
## ANCOM-BC2 pairwise directional test ...
## ANCOM-BC2 Dunnet's type of test ...
## ANCOM-BC2 trend test ...
save.image("src/Diversity.RData")